sitemap_generator 0.2.6 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +0 -1
- data/Rakefile +28 -16
- data/VERSION +1 -1
- data/lib/sitemap_generator/builder/helper.rb +10 -0
- data/lib/sitemap_generator/builder/sitemap_file.rb +114 -0
- data/lib/sitemap_generator/builder/sitemap_index_file.rb +24 -0
- data/lib/sitemap_generator/builder.rb +9 -0
- data/lib/sitemap_generator/interpreter.rb +28 -0
- data/lib/sitemap_generator/link.rb +28 -23
- data/lib/sitemap_generator/link_set.rb +132 -125
- data/lib/sitemap_generator/templates.rb +1 -3
- data/lib/sitemap_generator/utilities.rb +28 -3
- data/lib/sitemap_generator.rb +7 -7
- data/tasks/sitemap_generator_tasks.rake +3 -15
- metadata +9 -8
- data/lib/sitemap_generator/helper.rb +0 -55
- data/lib/sitemap_generator/rails_helper.rb +0 -28
- data/templates/sitemap_index.builder +0 -23
- data/templates/xml_sitemap.builder +0 -38
data/README.md
CHANGED
@@ -192,7 +192,6 @@ Notes
|
|
192
192
|
Known Bugs
|
193
193
|
========
|
194
194
|
|
195
|
-
- Sitemaps.org [states][sitemaps_org] that no Sitemap XML file should be more than 10Mb uncompressed. The plugin will warn you about this, but does nothing to avoid it (like move some URLs into a later file).
|
196
195
|
- There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
|
197
196
|
- Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
|
198
197
|
|
data/Rakefile
CHANGED
@@ -22,6 +22,28 @@ rescue LoadError
|
|
22
22
|
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
23
23
|
end
|
24
24
|
|
25
|
+
#
|
26
|
+
# Helper methods
|
27
|
+
#
|
28
|
+
module Helpers
|
29
|
+
extend self
|
30
|
+
|
31
|
+
# Return a full local path to path fragment <tt>path</tt>
|
32
|
+
def local_path(path)
|
33
|
+
File.join(File.dirname(__FILE__), path)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Copy all of the local files into <tt>path</tt> after completely cleaning it
|
37
|
+
def prepare_path(path)
|
38
|
+
rm_rf path
|
39
|
+
mkdir_p path
|
40
|
+
cp_r(FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}"], path)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# Tasks
|
46
|
+
#
|
25
47
|
task :default => :test
|
26
48
|
|
27
49
|
namespace :test do
|
@@ -37,29 +59,19 @@ namespace :test do
|
|
37
59
|
namespace :prepare do
|
38
60
|
task :gem do
|
39
61
|
ENV["SITEMAP_RAILS"] = 'gem'
|
40
|
-
prepare_path(local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
|
41
|
-
rm_rf(local_path('spec/mock_app_gem/public/sitemap*'))
|
62
|
+
Helpers.prepare_path(Helpers.local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
|
63
|
+
rm_rf(Helpers.local_path('spec/mock_app_gem/public/sitemap*'))
|
42
64
|
end
|
43
65
|
|
44
66
|
task :plugin do
|
45
67
|
ENV["SITEMAP_RAILS"] = 'plugin'
|
46
|
-
prepare_path(local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
|
47
|
-
rm_rf(local_path('spec/mock_app_plugin/public/sitemap*'))
|
68
|
+
Helpers.prepare_path(Helpers.local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
|
69
|
+
rm_rf(Helpers.local_path('spec/mock_app_plugin/public/sitemap*'))
|
48
70
|
end
|
49
71
|
|
50
72
|
task :rails3 do
|
51
73
|
ENV["SITEMAP_RAILS"] = 'rails3'
|
52
|
-
rm_rf(local_path('spec/mock_rails3_gem/public/sitemap*'))
|
53
|
-
end
|
54
|
-
|
55
|
-
def local_path(path)
|
56
|
-
File.join(File.dirname(__FILE__), path)
|
57
|
-
end
|
58
|
-
|
59
|
-
def prepare_path(path)
|
60
|
-
rm_rf path
|
61
|
-
mkdir_p path
|
62
|
-
cp_r(FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}"], path)
|
74
|
+
rm_rf(Helpers.local_path('spec/mock_rails3_gem/public/sitemap*'))
|
63
75
|
end
|
64
76
|
end
|
65
77
|
end
|
@@ -86,4 +98,4 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
|
|
86
98
|
rdoc.options << '--line-numbers' << '--inline-source'
|
87
99
|
rdoc.rdoc_files.include('README.md')
|
88
100
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
89
|
-
end
|
101
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'sitemap_generator/builder/helper'
|
2
|
+
require 'builder'
|
3
|
+
require 'zlib'
|
4
|
+
|
5
|
+
module SitemapGenerator
|
6
|
+
module Builder
|
7
|
+
class SitemapFile
|
8
|
+
include SitemapGenerator::Builder::Helper
|
9
|
+
|
10
|
+
attr_accessor :sitemap_path, :public_path, :filesize, :link_count, :hostname
|
11
|
+
|
12
|
+
# <tt>public_path</tt> full path of the directory to write sitemaps in.
|
13
|
+
# Usually your Rails <tt>public/</tt> directory.
|
14
|
+
#
|
15
|
+
# <tt>sitemap_path</tt> relative path including filename of the sitemap
|
16
|
+
# file relative to <tt>public_path</tt>
|
17
|
+
#
|
18
|
+
# <tt>hostname</tt> hostname including protocol to use in all links
|
19
|
+
# e.g. http://en.google.ca
|
20
|
+
def initialize(public_path, sitemap_path, hostname)
|
21
|
+
self.sitemap_path = sitemap_path
|
22
|
+
self.public_path = public_path
|
23
|
+
self.hostname = hostname
|
24
|
+
self.link_count = 0
|
25
|
+
|
26
|
+
@xml_content = '' # XML urlset content
|
27
|
+
@xml_wrapper_start = %q[<?xml version="1.0" encoding="UTF-8"?><urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">]
|
28
|
+
@xml_wrapper_end = %q[</urlset>]
|
29
|
+
self.filesize = @xml_wrapper_start.bytesize + @xml_wrapper_end.bytesize
|
30
|
+
end
|
31
|
+
|
32
|
+
def lastmod
|
33
|
+
File.mtime(self.full_path) rescue nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def empty?
|
37
|
+
self.link_count == 0
|
38
|
+
end
|
39
|
+
|
40
|
+
def full_url
|
41
|
+
URI.join(self.hostname, self.sitemap_path).to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
def full_path
|
45
|
+
@full_path ||= File.join(self.public_path, self.sitemap_path)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return a boolean indicating whether the sitemap file can fit another link
|
49
|
+
# of <tt>bytes</tt> bytes in size.
|
50
|
+
def file_can_fit?(bytes)
|
51
|
+
(self.filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && self.link_count < SitemapGenerator::MAX_SITEMAP_LINKS
|
52
|
+
end
|
53
|
+
|
54
|
+
# Add a link to the sitemap file and return a boolean indicating whether the
|
55
|
+
# link was added.
|
56
|
+
#
|
57
|
+
# If a link cannot be added, the file is too large or the link limit has been reached.
|
58
|
+
def add_link(link)
|
59
|
+
xml = build_xml(::Builder::XmlMarkup.new, link)
|
60
|
+
unless file_can_fit?(xml.bytesize)
|
61
|
+
self.finalize!
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
|
65
|
+
@xml_content << xml
|
66
|
+
self.filesize += xml.bytesize
|
67
|
+
self.link_count += 1
|
68
|
+
true
|
69
|
+
end
|
70
|
+
alias_method :<<, :add_link
|
71
|
+
|
72
|
+
# Return XML as a String
|
73
|
+
def build_xml(builder, link)
|
74
|
+
builder.url do
|
75
|
+
builder.loc link[:loc]
|
76
|
+
builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
|
77
|
+
builder.changefreq link[:changefreq] if link[:changefreq]
|
78
|
+
builder.priority link[:priority] if link[:priority]
|
79
|
+
|
80
|
+
unless link[:images].blank?
|
81
|
+
link[:images].each do |image|
|
82
|
+
builder.image:image do
|
83
|
+
builder.image :loc, image[:loc]
|
84
|
+
builder.image :caption, image[:caption] if image[:caption]
|
85
|
+
builder.image :geo_location, image[:geo_location] if image[:geo_location]
|
86
|
+
builder.image :title, image[:title] if image[:title]
|
87
|
+
builder.image :license, image[:license] if image[:license]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
builder << ''
|
93
|
+
end
|
94
|
+
|
95
|
+
# Insert the content into the XML "wrapper" and write and close the file.
|
96
|
+
#
|
97
|
+
# All the xml content in the instance is cleared, but attributes like
|
98
|
+
# <tt>filesize</tt> are still available.
|
99
|
+
def finalize!
|
100
|
+
return if self.frozen?
|
101
|
+
|
102
|
+
open(self.full_path, 'w') do |file|
|
103
|
+
gz = Zlib::GzipWriter.new(file)
|
104
|
+
gz.write @xml_wrapper_start
|
105
|
+
gz.write @xml_content
|
106
|
+
gz.write @xml_wrapper_end
|
107
|
+
gz.close
|
108
|
+
end
|
109
|
+
@xml_content = @xml_wrapper_start = @xml_wrapper_end = ''
|
110
|
+
self.freeze
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module SitemapGenerator
|
2
|
+
module Builder
|
3
|
+
class SitemapIndexFile < SitemapFile
|
4
|
+
|
5
|
+
def initialize(*args)
|
6
|
+
super(*args)
|
7
|
+
|
8
|
+
@ml_content = '' # XML urlset content
|
9
|
+
@xml_wrapper_start = %q[<?xml version="1.0" encoding="UTF-8"?><sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">]
|
10
|
+
@xml_wrapper_end = %q[</sitemapindex>]
|
11
|
+
self.filesize = @xml_wrapper_start.bytesize + @xml_wrapper_end.bytesize
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return XML as a String
|
15
|
+
def build_xml(builder, link)
|
16
|
+
builder.url do
|
17
|
+
builder.loc link[:loc]
|
18
|
+
builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
|
19
|
+
end
|
20
|
+
builder << ''
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module SitemapGenerator
|
2
|
+
|
3
|
+
# Evaluate a sitemap config file within the context of a class that includes the
|
4
|
+
# Rails URL helpers.
|
5
|
+
class Interpreter
|
6
|
+
|
7
|
+
if SitemapGenerator::Utilities.rails3?
|
8
|
+
include ::Rails.application.routes.url_helpers
|
9
|
+
else
|
10
|
+
require 'action_controller'
|
11
|
+
include ActionController::UrlWriter
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(sitemap_config_file=nil)
|
15
|
+
sitemap_config_file ||= File.join(::Rails.root, 'config/sitemap.rb')
|
16
|
+
eval(open(sitemap_config_file).read)
|
17
|
+
end
|
18
|
+
|
19
|
+
# KJV do we need this? We should be using path_* helpers.
|
20
|
+
# def self.default_url_options(options = nil)
|
21
|
+
# { :host => SitemapGenerator::Sitemap.default_host }
|
22
|
+
# end
|
23
|
+
|
24
|
+
def self.run
|
25
|
+
new
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -1,30 +1,35 @@
|
|
1
1
|
module SitemapGenerator
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
:changefreq => options[:changefreq],
|
11
|
-
:lastmod => options[:lastmod],
|
12
|
-
:host => options[:host],
|
13
|
-
:loc => URI.join(options[:host], path).to_s,
|
14
|
-
:images => prepare_images(options[:images], options[:host])
|
15
|
-
}
|
2
|
+
module Link
|
3
|
+
extend self
|
4
|
+
|
5
|
+
# Return a Hash of options suitable to pass to a SitemapGenerator::Builder::SitemapFile instance.
|
6
|
+
def generate(path, options = {})
|
7
|
+
if path.is_a?(SitemapGenerator::Builder::SitemapFile)
|
8
|
+
options.reverse_merge!(:host => path.hostname, :lastmod => path.lastmod)
|
9
|
+
path = path.sitemap_path
|
16
10
|
end
|
17
11
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
12
|
+
options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images)
|
13
|
+
options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
|
14
|
+
{
|
15
|
+
:path => path,
|
16
|
+
:priority => options[:priority],
|
17
|
+
:changefreq => options[:changefreq],
|
18
|
+
:lastmod => options[:lastmod],
|
19
|
+
:host => options[:host],
|
20
|
+
:loc => URI.join(options[:host], path).to_s,
|
21
|
+
:images => prepare_images(options[:images], options[:host])
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return an Array of image option Hashes suitable to be parsed by SitemapGenerator::Builder::SitemapFile
|
26
|
+
def prepare_images(images, host)
|
27
|
+
images.delete_if { |key,value| key[:loc] == nil }
|
28
|
+
images.each do |r|
|
29
|
+
r.assert_valid_keys(:loc, :caption, :geo_location, :title, :license)
|
30
|
+
r[:loc] = URI.join(host, r[:loc]).to_s
|
27
31
|
end
|
32
|
+
images[0..(SitemapGenerator::MAX_SITEMAP_IMAGES-1)]
|
28
33
|
end
|
29
34
|
end
|
30
35
|
end
|
@@ -1,166 +1,173 @@
|
|
1
1
|
require 'builder'
|
2
2
|
require 'action_view'
|
3
3
|
|
4
|
+
# A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
|
5
|
+
# which lists all the sitemap files written.
|
4
6
|
module SitemapGenerator
|
5
7
|
class LinkSet
|
6
|
-
include
|
7
|
-
include ActionView::Helpers::NumberHelper
|
8
|
-
|
9
|
-
attr_accessor :default_host, :yahoo_app_id, :links
|
10
|
-
attr_accessor :sitemaps
|
11
|
-
attr_accessor :max_entries
|
12
|
-
attr_accessor :link_count
|
13
|
-
|
14
|
-
alias :sitemap_files :sitemaps
|
15
|
-
|
16
|
-
# Create new link set instance.
|
17
|
-
def initialize
|
18
|
-
self.links = []
|
19
|
-
self.sitemaps = []
|
20
|
-
self.max_entries = SitemapGenerator::MAX_ENTRIES
|
21
|
-
self.link_count = 0
|
22
|
-
end
|
8
|
+
include ActionView::Helpers::NumberHelper # for number_with_delimiter
|
23
9
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
links.push Link.generate("/#{index_file}", :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
28
|
-
self.link_count += 2
|
29
|
-
end
|
10
|
+
attr_accessor :default_host, :public_path, :sitemaps_path
|
11
|
+
attr_accessor :sitemap, :sitemaps, :sitemap_index
|
12
|
+
attr_accessor :verbose, :yahoo_app_id
|
30
13
|
|
31
|
-
#
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
14
|
+
# Evaluate the sitemap config file and write all sitemaps.
|
15
|
+
#
|
16
|
+
# This should be refactored so that we can have multiple instances
|
17
|
+
# of LinkSet.
|
18
|
+
def create
|
19
|
+
require 'sitemap_generator/interpreter'
|
37
20
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
self.link_count += 1
|
43
|
-
end
|
21
|
+
start_time = Time.now
|
22
|
+
SitemapGenerator::Interpreter.run
|
23
|
+
finalize!
|
24
|
+
end_time = Time.now
|
44
25
|
|
45
|
-
|
46
|
-
def write
|
47
|
-
write_pending
|
26
|
+
puts "\nSitemap stats: #{number_with_delimiter(self.link_count)} links / #{self.sitemaps.size} files / " + ("%dm%02ds" % (end_time - start_time).divmod(60)) if verbose
|
48
27
|
end
|
49
28
|
|
50
|
-
#
|
51
|
-
|
52
|
-
|
29
|
+
# <tt>public_path</tt> (optional) full path to the directory to write sitemaps in.
|
30
|
+
# Defaults to your Rails <tt>public/</tt> directory.
|
31
|
+
#
|
32
|
+
# <tt>sitemaps_path</tt> (optional) path fragment within public to write sitemaps
|
33
|
+
# to e.g. 'en/'. Sitemaps are written to <tt>public_path</tt> + <tt>sitemaps_path</tt>
|
34
|
+
#
|
35
|
+
# <tt>default_host</tt> hostname including protocol to use in all sitemap links
|
36
|
+
# e.g. http://en.google.ca
|
37
|
+
def initialize(public_path = nil, sitemaps_path = nil, default_host = nil)
|
38
|
+
public_path = File.join(::Rails.root, 'public/') if public_path.nil?
|
39
|
+
self.default_host = default_host
|
40
|
+
self.public_path = public_path
|
41
|
+
self.sitemaps_path = sitemaps_path
|
42
|
+
|
43
|
+
# Completed sitemaps
|
44
|
+
self.sitemaps = []
|
53
45
|
end
|
54
46
|
|
55
|
-
|
56
|
-
|
57
|
-
write_upcoming
|
58
|
-
write_index
|
47
|
+
def link_count
|
48
|
+
self.sitemaps.map(&:link_count).inject(:+)
|
59
49
|
end
|
60
50
|
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
filename = File.join(Rails.root, "public", file)
|
68
|
-
write_file(filename, buffer)
|
69
|
-
show_progress("Sitemap", filename, buffer) if verbose
|
70
|
-
if slice_index==0
|
71
|
-
links.clear
|
72
|
-
else
|
73
|
-
links.slice! slice_index, links.size
|
74
|
-
end
|
51
|
+
# Called within the user's eval'ed sitemap config file. Add links to sitemap files
|
52
|
+
# passing a block.
|
53
|
+
#
|
54
|
+
# TODO: Refactor. The call chain is confusing and convoluted here.
|
55
|
+
def add_links
|
56
|
+
raise ArgumentError, "Default hostname not set" if default_host.blank?
|
75
57
|
|
76
|
-
|
77
|
-
|
58
|
+
# I'd rather have these calls in <tt>create</tt> but we have to wait
|
59
|
+
# for <tt>default_host</tt> to be set by the user's sitemap config
|
60
|
+
new_sitemap
|
61
|
+
add_default_links
|
78
62
|
|
79
|
-
|
80
|
-
def write_index
|
81
|
-
buffer = ""
|
82
|
-
xml = Builder::XmlMarkup.new(:target => buffer)
|
83
|
-
eval(SitemapGenerator.templates.sitemap_index, binding)
|
84
|
-
filename = File.join(Rails.root, "public", index_file)
|
85
|
-
write_file(filename, buffer)
|
86
|
-
show_progress("Sitemap Index", filename, buffer) if verbose
|
87
|
-
links.clear
|
88
|
-
sitemaps.clear
|
63
|
+
yield Mapper.new(self)
|
89
64
|
end
|
90
65
|
|
91
|
-
#
|
92
|
-
|
93
|
-
|
66
|
+
# Called from Mapper.
|
67
|
+
#
|
68
|
+
# Add a link to the current sitemap.
|
69
|
+
def add_link(link)
|
70
|
+
unless self.sitemap << link
|
71
|
+
new_sitemap
|
72
|
+
self.sitemap << link
|
73
|
+
end
|
94
74
|
end
|
95
75
|
|
96
|
-
#
|
97
|
-
|
98
|
-
|
99
|
-
|
76
|
+
# Add the current sitemap to the <tt>sitemaps</tt> Array and
|
77
|
+
# start a new sitemap.
|
78
|
+
#
|
79
|
+
# If the current sitemap is nil or empty it is not added.
|
80
|
+
def new_sitemap
|
81
|
+
unless self.sitemap_index
|
82
|
+
self.sitemap_index = SitemapGenerator::Builder::SitemapIndexFile.new(public_path, sitemap_index_path, default_host)
|
83
|
+
end
|
100
84
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
end
|
85
|
+
unless self.sitemap
|
86
|
+
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
|
87
|
+
end
|
105
88
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
89
|
+
# Mark the sitemap as complete and add it to the sitemap index
|
90
|
+
unless self.sitemap.empty?
|
91
|
+
self.sitemap.finalize!
|
92
|
+
self.sitemap_index << Link.generate(self.sitemap)
|
93
|
+
self.sitemaps << self.sitemap
|
94
|
+
show_progress(self.sitemap) if verbose
|
110
95
|
|
111
|
-
|
112
|
-
|
113
|
-
!first_sitemap?
|
96
|
+
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
|
97
|
+
end
|
114
98
|
end
|
115
99
|
|
116
|
-
#
|
117
|
-
def
|
118
|
-
|
100
|
+
# Report progress line.
|
101
|
+
def show_progress(sitemap)
|
102
|
+
uncompressed_size = number_to_human_size(sitemap.filesize)
|
103
|
+
compressed_size = number_to_human_size(File.size?(sitemap.full_path))
|
104
|
+
puts "+ #{sitemap.sitemap_path} #{sitemap.link_count} links / #{uncompressed_size} / #{compressed_size} gzipped"
|
119
105
|
end
|
120
106
|
|
121
|
-
#
|
122
|
-
def
|
123
|
-
|
107
|
+
# Finalize all sitemap files
|
108
|
+
def finalize!
|
109
|
+
new_sitemap
|
110
|
+
self.sitemap_index.finalize!
|
124
111
|
end
|
125
112
|
|
126
|
-
#
|
127
|
-
|
128
|
-
|
129
|
-
|
113
|
+
# Ping search engines.
|
114
|
+
#
|
115
|
+
# @see http://en.wikipedia.org/wiki/Sitemap_index
|
116
|
+
def ping_search_engines
|
117
|
+
require 'open-uri'
|
118
|
+
|
119
|
+
sitemap_index_url = CGI.escape(self.sitemap_index.full_url)
|
120
|
+
search_engines = {
|
121
|
+
:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{sitemap_index_url}",
|
122
|
+
:yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{sitemap_index_url}&appid=#{yahoo_app_id}",
|
123
|
+
:ask => "http://submissions.ask.com/ping?sitemap=#{sitemap_index_url}",
|
124
|
+
:bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{sitemap_index_url}",
|
125
|
+
:sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{sitemap_index_url}"
|
126
|
+
}
|
127
|
+
|
128
|
+
puts "\n" if verbose
|
129
|
+
search_engines.each do |engine, link|
|
130
|
+
next if engine == :yahoo && !self.yahoo_app_id
|
131
|
+
begin
|
132
|
+
open(link)
|
133
|
+
puts "Successful ping of #{engine.to_s.titleize}" if verbose
|
134
|
+
rescue Timeout::Error, StandardError => e
|
135
|
+
puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
|
136
|
+
end
|
137
|
+
end
|
130
138
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
139
|
+
if !self.yahoo_app_id && verbose
|
140
|
+
puts "\n"
|
141
|
+
puts <<-END.gsub(/^\s+/, '')
|
142
|
+
To ping Yahoo you require a Yahoo AppID. Add it to your config/sitemap.rb with:
|
135
143
|
|
136
|
-
|
137
|
-
def enough_links?
|
138
|
-
!more_links?
|
139
|
-
end
|
144
|
+
SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
|
140
145
|
|
141
|
-
|
142
|
-
|
143
|
-
|
146
|
+
For more information see http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
|
147
|
+
END
|
148
|
+
end
|
144
149
|
end
|
145
150
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
151
|
+
protected
|
152
|
+
|
153
|
+
def add_default_links
|
154
|
+
self.sitemap << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
155
|
+
self.sitemap << Link.generate(self.sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
150
156
|
end
|
151
157
|
|
152
|
-
#
|
153
|
-
|
154
|
-
|
158
|
+
# Return the current sitemap filename with index.
|
159
|
+
#
|
160
|
+
# The index depends on the length of the <tt>sitemaps</tt> array.
|
161
|
+
def new_sitemap_path
|
162
|
+
File.join(self.sitemaps_path || '', "sitemap#{self.sitemaps.length + 1}.xml.gz")
|
155
163
|
end
|
156
164
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
puts "Sitemap stats: #{number_with_delimiter(SitemapGenerator::Sitemap.link_count)} links, " + ("%dm%02ds" % (stop_time - start_time).divmod(60)) if verbose
|
165
|
+
# Return the current sitemap index filename.
|
166
|
+
#
|
167
|
+
# At the moment we only support one index file which can link to
|
168
|
+
# up to 50,000 sitemap files.
|
169
|
+
def sitemap_index_path
|
170
|
+
File.join(self.sitemaps_path || '', 'sitemap_index.xml.gz')
|
164
171
|
end
|
165
172
|
end
|
166
|
-
end
|
173
|
+
end
|
@@ -7,8 +7,6 @@ module SitemapGenerator
|
|
7
7
|
# Define an accessor method for each template file.
|
8
8
|
class Templates
|
9
9
|
FILES = {
|
10
|
-
:sitemap_index => 'sitemap_index.builder',
|
11
|
-
:sitemap_xml => 'xml_sitemap.builder',
|
12
10
|
:sitemap_sample => 'sitemap.rb',
|
13
11
|
}
|
14
12
|
|
@@ -28,7 +26,7 @@ module SitemapGenerator
|
|
28
26
|
|
29
27
|
# Return the full path to a template.
|
30
28
|
#
|
31
|
-
# <tt>file</tt> template symbol e.g. <tt>:
|
29
|
+
# <tt>file</tt> template symbol e.g. <tt>:sitemap_sample</tt>
|
32
30
|
def template_path(template)
|
33
31
|
File.join(@root, 'templates', self.class::FILES[template])
|
34
32
|
end
|
@@ -3,14 +3,14 @@ module SitemapGenerator
|
|
3
3
|
extend self
|
4
4
|
|
5
5
|
# Copy templates/sitemap.rb to config if not there yet.
|
6
|
-
def install_sitemap_rb
|
6
|
+
def install_sitemap_rb(verbose=false)
|
7
7
|
if File.exist?(File.join(RAILS_ROOT, 'config/sitemap.rb'))
|
8
|
-
puts "already exists: config/sitemap.rb, file not copied"
|
8
|
+
puts "already exists: config/sitemap.rb, file not copied" if verbose
|
9
9
|
else
|
10
10
|
FileUtils.cp(
|
11
11
|
SitemapGenerator.templates.template_path(:sitemap_sample),
|
12
12
|
File.join(RAILS_ROOT, 'config/sitemap.rb'))
|
13
|
-
puts "created: config/sitemap.rb"
|
13
|
+
puts "created: config/sitemap.rb" if verbose
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
@@ -25,5 +25,30 @@ module SitemapGenerator
|
|
25
25
|
def clean_files
|
26
26
|
FileUtils.rm(Dir[File.join(RAILS_ROOT, 'public/sitemap*.xml.gz')])
|
27
27
|
end
|
28
|
+
|
29
|
+
# Returns whether this environment is using ActionPack
|
30
|
+
# version 3.0.0 or greater.
|
31
|
+
#
|
32
|
+
# @return [Boolean]
|
33
|
+
def self.rails3?
|
34
|
+
# The ActionPack module is always loaded automatically in Rails >= 3
|
35
|
+
return false unless defined?(ActionPack) && defined?(ActionPack::VERSION)
|
36
|
+
|
37
|
+
version =
|
38
|
+
if defined?(ActionPack::VERSION::MAJOR)
|
39
|
+
ActionPack::VERSION::MAJOR
|
40
|
+
else
|
41
|
+
# Rails 1.2
|
42
|
+
ActionPack::VERSION::Major
|
43
|
+
end
|
44
|
+
|
45
|
+
# 3.0.0.beta1 acts more like ActionPack 2
|
46
|
+
# for purposes of this method
|
47
|
+
# (checking whether block helpers require = or -).
|
48
|
+
# This extra check can be removed when beta2 is out.
|
49
|
+
version >= 3 &&
|
50
|
+
!(defined?(ActionPack::VERSION::TINY) &&
|
51
|
+
ActionPack::VERSION::TINY == "0.beta")
|
52
|
+
end
|
28
53
|
end
|
29
54
|
end
|
data/lib/sitemap_generator.rb
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
+
require 'sitemap_generator/builder'
|
1
2
|
require 'sitemap_generator/mapper'
|
2
3
|
require 'sitemap_generator/link'
|
3
|
-
require 'sitemap_generator/rails_helper'
|
4
|
-
require 'sitemap_generator/helper'
|
5
4
|
require 'sitemap_generator/link_set'
|
6
|
-
require 'sitemap_generator/helper'
|
7
5
|
require 'sitemap_generator/templates'
|
8
6
|
require 'sitemap_generator/utilities'
|
9
|
-
|
10
|
-
require 'sitemap_generator/railtie' if SitemapGenerator::RailsHelper.rails3?
|
7
|
+
require 'sitemap_generator/railtie' if SitemapGenerator::Utilities.rails3?
|
11
8
|
|
12
9
|
module SitemapGenerator
|
13
10
|
silence_warnings do
|
14
11
|
VERSION = File.read(File.dirname(__FILE__) + "/../VERSION").strip
|
15
|
-
|
16
|
-
|
12
|
+
MAX_SITEMAP_FILES = 50_000 # max sitemap links per index file
|
13
|
+
MAX_SITEMAP_LINKS = 50_000 # max links per sitemap
|
14
|
+
MAX_SITEMAP_IMAGES = 1_000 # max images per url
|
15
|
+
MAX_SITEMAP_FILESIZE = 10.megabytes # bytes
|
16
|
+
|
17
17
|
Sitemap = LinkSet.new
|
18
18
|
end
|
19
19
|
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'zlib'
|
2
1
|
begin
|
3
2
|
require 'sitemap_generator'
|
4
3
|
rescue LoadError, NameError
|
@@ -8,7 +7,7 @@ end
|
|
8
7
|
namespace :sitemap do
|
9
8
|
desc "Install a default config/sitemap.rb file"
|
10
9
|
task :install do
|
11
|
-
SitemapGenerator::Utilities.install_sitemap_rb
|
10
|
+
SitemapGenerator::Utilities.install_sitemap_rb(verbose)
|
12
11
|
end
|
13
12
|
|
14
13
|
desc "Delete all Sitemap files in public/ directory"
|
@@ -25,19 +24,8 @@ namespace :sitemap do
|
|
25
24
|
task 'refresh:no_ping' => ['sitemap:create']
|
26
25
|
|
27
26
|
task :create => [:environment] do
|
28
|
-
|
29
|
-
|
30
|
-
if SitemapGenerator::RailsHelper.rails3?
|
31
|
-
SitemapGenerator::Sitemap.class_eval do
|
32
|
-
include Rails.application.routes.url_helpers
|
33
|
-
end
|
34
|
-
else
|
35
|
-
require 'action_controller'
|
36
|
-
SitemapGenerator::Sitemap.class_eval do
|
37
|
-
include ActionController::UrlWriter
|
38
|
-
end
|
39
|
-
end
|
40
|
-
SitemapGenerator::Sitemap.create_files
|
27
|
+
SitemapGenerator::Sitemap.verbose = verbose
|
28
|
+
SitemapGenerator::Sitemap.create
|
41
29
|
end
|
42
30
|
end
|
43
31
|
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 3
|
8
|
+
- 0
|
9
|
+
version: 0.3.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Adam Salter
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-21 00:00:00 -07:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -44,11 +44,14 @@ files:
|
|
44
44
|
- Rakefile
|
45
45
|
- VERSION
|
46
46
|
- lib/sitemap_generator.rb
|
47
|
-
- lib/sitemap_generator/
|
47
|
+
- lib/sitemap_generator/builder.rb
|
48
|
+
- lib/sitemap_generator/builder/helper.rb
|
49
|
+
- lib/sitemap_generator/builder/sitemap_file.rb
|
50
|
+
- lib/sitemap_generator/builder/sitemap_index_file.rb
|
51
|
+
- lib/sitemap_generator/interpreter.rb
|
48
52
|
- lib/sitemap_generator/link.rb
|
49
53
|
- lib/sitemap_generator/link_set.rb
|
50
54
|
- lib/sitemap_generator/mapper.rb
|
51
|
-
- lib/sitemap_generator/rails_helper.rb
|
52
55
|
- lib/sitemap_generator/railtie.rb
|
53
56
|
- lib/sitemap_generator/tasks.rb
|
54
57
|
- lib/sitemap_generator/templates.rb
|
@@ -57,8 +60,6 @@ files:
|
|
57
60
|
- rails/uninstall.rb
|
58
61
|
- tasks/sitemap_generator_tasks.rake
|
59
62
|
- templates/sitemap.rb
|
60
|
-
- templates/sitemap_index.builder
|
61
|
-
- templates/xml_sitemap.builder
|
62
63
|
has_rdoc: true
|
63
64
|
homepage: http://github.com/kjvarga/sitemap_generator
|
64
65
|
licenses: []
|
@@ -1,55 +0,0 @@
|
|
1
|
-
module SitemapGenerator
|
2
|
-
# UrlHelpers are included by the rake tasks. This is not ideal, but should
|
3
|
-
# suffice until things are better organized.
|
4
|
-
module Helper
|
5
|
-
def self.included(base)
|
6
|
-
base.class_eval do
|
7
|
-
def self.default_url_options(options = nil)
|
8
|
-
{ :host => SitemapGenerator::Sitemap.default_host }
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def load_sitemap_rb
|
14
|
-
sitemap_mapper_file = File.join(Rails.root, 'config/sitemap.rb')
|
15
|
-
eval(open(sitemap_mapper_file).read)
|
16
|
-
end
|
17
|
-
|
18
|
-
def url_with_hostname(path)
|
19
|
-
URI.join(SitemapGenerator::Sitemap.default_host, path).to_s
|
20
|
-
end
|
21
|
-
|
22
|
-
def w3c_date(date)
|
23
|
-
date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
24
|
-
end
|
25
|
-
|
26
|
-
def ping_search_engines(sitemap_index)
|
27
|
-
require 'open-uri'
|
28
|
-
index_location = CGI.escape(url_with_hostname(sitemap_index))
|
29
|
-
# engines list from http://en.wikipedia.org/wiki/Sitemap_index
|
30
|
-
yahoo_app_id = SitemapGenerator::Sitemap.yahoo_app_id
|
31
|
-
{:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}",
|
32
|
-
:yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{index_location}&appid=#{yahoo_app_id}",
|
33
|
-
:ask => "http://submissions.ask.com/ping?sitemap=#{index_location}",
|
34
|
-
:bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{index_location}",
|
35
|
-
:sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link|
|
36
|
-
begin
|
37
|
-
unless SitemapGenerator::Sitemap.yahoo_app_id == false
|
38
|
-
open(link)
|
39
|
-
puts "Successful ping of #{engine.to_s.titleize}" if verbose
|
40
|
-
end
|
41
|
-
rescue Timeout::Error, StandardError => e
|
42
|
-
puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}" if verbose
|
43
|
-
puts <<-END if engine == :yahoo && verbose
|
44
|
-
Yahoo requires an 'AppID' for more than one ping per "timeframe", you can either:
|
45
|
-
- remove yahoo from the ping list (config/sitemap.rb):
|
46
|
-
SitemapGenerator::Sitemap.yahoo_app_id = false
|
47
|
-
- or add your Yahoo AppID to the generator (config/sitemap.rb):
|
48
|
-
SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
|
49
|
-
For more information: http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
|
50
|
-
END
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
module SitemapGenerator
|
2
|
-
module RailsHelper
|
3
|
-
# Returns whether this environment is using ActionPack
|
4
|
-
# version 3.0.0 or greater.
|
5
|
-
#
|
6
|
-
# @return [Boolean]
|
7
|
-
def self.rails3?
|
8
|
-
# The ActionPack module is always loaded automatically in Rails >= 3
|
9
|
-
return false unless defined?(ActionPack) && defined?(ActionPack::VERSION)
|
10
|
-
|
11
|
-
version =
|
12
|
-
if defined?(ActionPack::VERSION::MAJOR)
|
13
|
-
ActionPack::VERSION::MAJOR
|
14
|
-
else
|
15
|
-
# Rails 1.2
|
16
|
-
ActionPack::VERSION::Major
|
17
|
-
end
|
18
|
-
|
19
|
-
# 3.0.0.beta1 acts more like ActionPack 2
|
20
|
-
# for purposes of this method
|
21
|
-
# (checking whether block helpers require = or -).
|
22
|
-
# This extra check can be removed when beta2 is out.
|
23
|
-
version >= 3 &&
|
24
|
-
!(defined?(ActionPack::VERSION::TINY) &&
|
25
|
-
ActionPack::VERSION::TINY == "0.beta")
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
# <?xml version="1.0" encoding="UTF-8"?>
|
3
|
-
# <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
4
|
-
# <sitemap>
|
5
|
-
# <loc>http://www.example.com/sitemap1.xml.gz</loc>
|
6
|
-
# <lastmod>2004-10-01T18:23:17+00:00</lastmod>
|
7
|
-
# </sitemap>
|
8
|
-
# <sitemap>
|
9
|
-
# <loc>http://www.example.com/sitemap2.xml.gz</loc>
|
10
|
-
# <lastmod>2005-01-01</lastmod>
|
11
|
-
# </sitemap>
|
12
|
-
# </sitemapindex>
|
13
|
-
|
14
|
-
xml.instruct!
|
15
|
-
xml.sitemapindex "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
|
16
|
-
sitemap_files.each do |file|
|
17
|
-
xml.sitemap do
|
18
|
-
xml.loc url_with_hostname(File.basename(file))
|
19
|
-
xml.lastmod w3c_date(File.mtime(file))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
xml.instruct!
|
3
|
-
xml.urlset "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
|
4
|
-
"xsi:schemaLocation" => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd",
|
5
|
-
"xmlns:image" => "http://www.google.com/schemas/sitemap-image/1.1",
|
6
|
-
"xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
|
7
|
-
|
8
|
-
links.each_with_index do |link,index|
|
9
|
-
buffer_url = ""
|
10
|
-
url = Builder::XmlMarkup.new(:target=>buffer_url)
|
11
|
-
url.url do
|
12
|
-
url.loc link[:loc]
|
13
|
-
url.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
|
14
|
-
url.changefreq link[:changefreq] if link[:changefreq]
|
15
|
-
url.priority link[:priority] if link[:priority]
|
16
|
-
|
17
|
-
unless link[:images].blank?
|
18
|
-
link[:images].each do |image|
|
19
|
-
url.image:image do
|
20
|
-
url.image :loc, image[:loc]
|
21
|
-
url.image :caption, image[:caption] if image[:caption]
|
22
|
-
url.image :geo_location, image[:geo_location] if image[:geo_location]
|
23
|
-
url.image :title, image[:title] if image[:title]
|
24
|
-
url.image :license, image[:license] if image[:license]
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
if (buffer+buffer_url).size < 10.megabytes
|
31
|
-
xml << buffer_url
|
32
|
-
else
|
33
|
-
slice_index = index
|
34
|
-
break
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|