sitemap_generator 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +7 -0
- data/VERSION +1 -1
- data/lib/sitemap_generator.rb +4 -2
- data/lib/sitemap_generator/builder.rb +2 -1
- data/lib/sitemap_generator/builder/sitemap_file.rb +48 -68
- data/lib/sitemap_generator/builder/sitemap_index_file.rb +20 -6
- data/lib/sitemap_generator/builder/sitemap_index_url.rb +28 -0
- data/lib/sitemap_generator/builder/sitemap_url.rb +97 -0
- data/lib/sitemap_generator/link_set.rb +45 -66
- metadata +7 -9
- data/lib/sitemap_generator/builder/helper.rb +0 -10
- data/lib/sitemap_generator/builder/sitemap_file.rb.orig +0 -133
- data/lib/sitemap_generator/link.rb +0 -37
- data/lib/sitemap_generator/mapper.rb +0 -16
data/Rakefile
CHANGED
@@ -49,12 +49,16 @@ end
|
|
49
49
|
#
|
50
50
|
task :default => :test
|
51
51
|
|
52
|
+
desc "Run specs"
|
52
53
|
namespace :test do
|
53
54
|
#desc "Test as a gem, plugin and Rails 3 gem"
|
54
55
|
#task :all => ['test:gem', 'test:plugin']
|
55
56
|
|
57
|
+
desc "Run specs on a gem install"
|
56
58
|
task :gem => ['test:prepare:gem', 'multi_spec']
|
59
|
+
desc "Run specs on a plugin install"
|
57
60
|
task :plugin => ['test:prepare:plugin', 'multi_spec']
|
61
|
+
desc "Run specs on a rails3 install"
|
58
62
|
task :rails3 => ['test:prepare:rails3', 'multi_spec']
|
59
63
|
|
60
64
|
task :multi_spec do
|
@@ -63,18 +67,21 @@ namespace :test do
|
|
63
67
|
end
|
64
68
|
|
65
69
|
namespace :prepare do
|
70
|
+
desc "Prepare the gem install for testing"
|
66
71
|
task :gem do
|
67
72
|
ENV["SITEMAP_RAILS"] = 'gem'
|
68
73
|
Helpers.prepare_path(Helpers.local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
|
69
74
|
rm_rf(Helpers.local_path('spec/mock_app_gem/public/sitemap*'))
|
70
75
|
end
|
71
76
|
|
77
|
+
desc "Prepare the plugin install for testing"
|
72
78
|
task :plugin do
|
73
79
|
ENV["SITEMAP_RAILS"] = 'plugin'
|
74
80
|
Helpers.prepare_path(Helpers.local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
|
75
81
|
rm_rf(Helpers.local_path('spec/mock_app_plugin/public/sitemap*'))
|
76
82
|
end
|
77
83
|
|
84
|
+
desc "Prepare the rails3 install for testing"
|
78
85
|
task :rails3 do
|
79
86
|
ENV["SITEMAP_RAILS"] = 'rails3'
|
80
87
|
rm_rf(Helpers.local_path('spec/mock_rails3_gem/public/sitemap*'))
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.2.0
|
data/lib/sitemap_generator.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
1
|
require 'sitemap_generator/builder'
|
2
|
-
require 'sitemap_generator/mapper'
|
3
|
-
require 'sitemap_generator/link'
|
4
2
|
require 'sitemap_generator/link_set'
|
5
3
|
require 'sitemap_generator/templates'
|
6
4
|
require 'sitemap_generator/utilities'
|
@@ -9,6 +7,10 @@ require 'sitemap_generator/railtie' if SitemapGenerator::Utilities.rails3?
|
|
9
7
|
require 'active_support/core_ext/numeric'
|
10
8
|
|
11
9
|
module SitemapGenerator
|
10
|
+
class SitemapError <StandardError; end
|
11
|
+
class SitemapFullError < SitemapError; end
|
12
|
+
class SitemapFinalizedError < SitemapError; end
|
13
|
+
|
12
14
|
silence_warnings do
|
13
15
|
VERSION = File.read(File.dirname(__FILE__) + "/../VERSION").strip
|
14
16
|
MAX_SITEMAP_FILES = 50_000 # max sitemap links per index file
|
@@ -1,6 +1,7 @@
|
|
1
|
-
require 'sitemap_generator/builder/helper'
|
2
1
|
require 'sitemap_generator/builder/sitemap_file'
|
3
2
|
require 'sitemap_generator/builder/sitemap_index_file'
|
3
|
+
require 'sitemap_generator/builder/sitemap_url'
|
4
|
+
require 'sitemap_generator/builder/sitemap_index_url'
|
4
5
|
|
5
6
|
module SitemapGenerator
|
6
7
|
module Builder
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require 'sitemap_generator/builder/helper'
|
2
1
|
require 'builder'
|
3
2
|
require 'zlib'
|
3
|
+
require 'action_view'
|
4
4
|
|
5
5
|
module SitemapGenerator
|
6
6
|
module Builder
|
@@ -8,13 +8,12 @@ module SitemapGenerator
|
|
8
8
|
# General Usage:
|
9
9
|
#
|
10
10
|
# sitemap = SitemapFile.new('public/', 'sitemap.xml', 'http://example.com')
|
11
|
-
#
|
12
|
-
# sitemap.
|
13
|
-
#
|
11
|
+
# sitemap.add('/', { ... }) <- add a link to the sitemap
|
12
|
+
# sitemap.finalize! <- creates a new sitemap file in directory public/
|
13
|
+
# and freezes the object to protect it from further modification
|
14
14
|
#
|
15
15
|
class SitemapFile
|
16
|
-
include
|
17
|
-
|
16
|
+
include ActionView::Helpers::NumberHelper
|
18
17
|
attr_accessor :sitemap_path, :public_path, :filesize, :link_count, :hostname
|
19
18
|
|
20
19
|
# <tt>public_path</tt> full path of the directory to write sitemaps in.
|
@@ -25,7 +24,7 @@ module SitemapGenerator
|
|
25
24
|
#
|
26
25
|
# <tt>hostname</tt> hostname including protocol to use in all links
|
27
26
|
# e.g. http://en.google.ca
|
28
|
-
def initialize(public_path, sitemap_path, hostname)
|
27
|
+
def initialize(public_path, sitemap_path, hostname='http://example.com')
|
29
28
|
self.sitemap_path = sitemap_path
|
30
29
|
self.public_path = public_path
|
31
30
|
self.hostname = hostname
|
@@ -70,79 +69,47 @@ module SitemapGenerator
|
|
70
69
|
(self.filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && self.link_count < SitemapGenerator::MAX_SITEMAP_LINKS
|
71
70
|
end
|
72
71
|
|
73
|
-
# Add a link to the sitemap file
|
74
|
-
#
|
72
|
+
# Add a link to the sitemap file.
|
73
|
+
#
|
74
|
+
# If a link cannot be added, for example if the file is too large or the link
|
75
|
+
# limit has been reached, a SitemapGenerator::SitemapFull exception is raised.
|
75
76
|
#
|
76
|
-
# If
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
77
|
+
# If the Sitemap has already been finalized a SitemapGenerator::SitemapFinalized
|
78
|
+
# exception is raised.
|
79
|
+
#
|
80
|
+
# Call with:
|
81
|
+
# sitemap_url - a SitemapUrl instance
|
82
|
+
# sitemap, options - a Sitemap instance and options hash
|
83
|
+
# path, options - a path for the URL and options hash
|
84
|
+
def add(link, options={})
|
85
|
+
xml = if link.is_a?(SitemapGenerator::Builder::SitemapUrl)
|
86
|
+
link.to_xml
|
87
|
+
else
|
88
|
+
SitemapGenerator::Builder::SitemapUrl.new(link, options).to_xml
|
82
89
|
end
|
83
90
|
|
91
|
+
if self.finalized?
|
92
|
+
raise SitemapGenerator::SitemapFinalized
|
93
|
+
elsif !file_can_fit?(bytesize(xml))
|
94
|
+
raise SitemapGenerator::SitemapFull
|
95
|
+
end
|
96
|
+
|
97
|
+
# Add the XML
|
84
98
|
@xml_content << xml
|
85
99
|
self.filesize += bytesize(xml)
|
86
100
|
self.link_count += 1
|
87
101
|
true
|
88
102
|
end
|
89
|
-
alias_method :<<, :add_link
|
90
|
-
|
91
|
-
# Return XML as a String
|
92
|
-
def build_xml(builder, link)
|
93
|
-
builder.url do
|
94
|
-
builder.loc link[:loc]
|
95
|
-
builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
|
96
|
-
builder.changefreq link[:changefreq] if link[:changefreq]
|
97
|
-
builder.priority link[:priority] if link[:priority]
|
98
|
-
|
99
|
-
unless link[:images].blank?
|
100
|
-
link[:images].each do |image|
|
101
|
-
builder.image:image do
|
102
|
-
builder.image :loc, image[:loc]
|
103
|
-
builder.image :caption, image[:caption] if image[:caption]
|
104
|
-
builder.image :geo_location, image[:geo_location] if image[:geo_location]
|
105
|
-
builder.image :title, image[:title] if image[:title]
|
106
|
-
builder.image :license, image[:license] if image[:license]
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
unless link[:video].blank?
|
112
|
-
video = link[:video]
|
113
|
-
builder.video :video do
|
114
|
-
# required elements
|
115
|
-
builder.video :content_loc, video[:content_loc] if video[:content_loc]
|
116
|
-
if video[:player_loc]
|
117
|
-
builder.video :player_loc, video[:player_loc], :allow_embed => (video[:allow_embed] ? 'yes' : 'no'), :autoplay => video[:autoplay]
|
118
|
-
end
|
119
|
-
builder.video :thumbnail_loc, video[:thumbnail_loc]
|
120
|
-
builder.video :title, video[:title]
|
121
|
-
builder.video :description, video[:description]
|
122
|
-
|
123
|
-
builder.video :rating, video[:rating] if video[:rating]
|
124
|
-
builder.video :view_count, video[:view_count] if video[:view_count]
|
125
|
-
builder.video :publication_date, video[:publication_date] if video[:publication_date]
|
126
|
-
builder.video :expiration_date, video[:expiration_date] if video[:expiration_date]
|
127
|
-
builder.video :duration, video[:duration] if video[:duration]
|
128
|
-
builder.video :family_friendly, (video[:family_friendly] ? 'yes' : 'no') if video[:family_friendly]
|
129
|
-
builder.video :duration, video[:duration] if video[:duration]
|
130
|
-
video[:tags].each {|tag| builder.video :tag, tag } if video[:tags]
|
131
|
-
builder.video :tag, video[:tag] if video[:tag]
|
132
|
-
builder.video :category, video[:category] if video[:category]
|
133
|
-
builder.video :gallery_loc, video[:gallery_loc] if video[:gallery_loc]
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
137
|
-
builder << ''
|
138
|
-
end
|
139
103
|
|
140
|
-
#
|
104
|
+
# Write out the Sitemap file and freeze this object.
|
141
105
|
#
|
142
106
|
# All the xml content in the instance is cleared, but attributes like
|
143
107
|
# <tt>filesize</tt> are still available.
|
108
|
+
#
|
109
|
+
# A SitemapGenerator::SitemapFinalized exception is raised if the Sitemap
|
110
|
+
# has already been finalized
|
144
111
|
def finalize!
|
145
|
-
|
112
|
+
raise SitemapGenerator::SitemapFinalized if self.finalized?
|
146
113
|
|
147
114
|
open(self.full_path, 'wb') do |file|
|
148
115
|
gz = Zlib::GzipWriter.new(file)
|
@@ -155,7 +122,20 @@ module SitemapGenerator
|
|
155
122
|
self.freeze
|
156
123
|
end
|
157
124
|
|
158
|
-
|
125
|
+
def finalized?
|
126
|
+
return self.frozen?
|
127
|
+
end
|
128
|
+
|
129
|
+
# Return a summary string
|
130
|
+
def summary
|
131
|
+
uncompressed_size = number_to_human_size(filesize)
|
132
|
+
compressed_size = number_to_human_size(File.size?(full_path))
|
133
|
+
"+ #{'%-21s' % self.sitemap_path} #{'%13s' % self.link_count} links / #{'%10s' % uncompressed_size} / #{'%10s' % compressed_size} gzipped"
|
134
|
+
end
|
135
|
+
|
136
|
+
protected
|
137
|
+
|
138
|
+
# Return the bytesize length of the string. Ruby 1.8.6 compatible.
|
159
139
|
def bytesize(string)
|
160
140
|
string.respond_to?(:bytesize) ? string.bytesize : string.length
|
161
141
|
end
|
@@ -1,10 +1,12 @@
|
|
1
1
|
module SitemapGenerator
|
2
2
|
module Builder
|
3
3
|
class SitemapIndexFile < SitemapFile
|
4
|
+
attr_accessor :sitemaps
|
4
5
|
|
5
6
|
def initialize(*args)
|
6
7
|
super(*args)
|
7
8
|
|
9
|
+
self.sitemaps = []
|
8
10
|
@xml_content = '' # XML urlset content
|
9
11
|
@xml_wrapper_start = <<-HTML
|
10
12
|
<?xml version="1.0" encoding="UTF-8"?>
|
@@ -20,13 +22,25 @@ module SitemapGenerator
|
|
20
22
|
self.filesize = bytesize(@xml_wrapper_start) + bytesize(@xml_wrapper_end)
|
21
23
|
end
|
22
24
|
|
23
|
-
#
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
# Finalize sitemaps as they are added to the index
|
26
|
+
def add(link, options={})
|
27
|
+
if link.is_a?(SitemapFile)
|
28
|
+
self.sitemaps << link
|
29
|
+
link.finalize!
|
28
30
|
end
|
29
|
-
|
31
|
+
super(SitemapGenerator::Builder::SitemapIndexUrl.new(link, options))
|
32
|
+
end
|
33
|
+
|
34
|
+
# Return the total number of links in all sitemaps reference by this index file
|
35
|
+
def total_link_count
|
36
|
+
self.sitemaps.inject(0) { |link_count_sum, sitemap| link_count_sum + sitemap.link_count }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Return a summary string
|
40
|
+
def summary
|
41
|
+
uncompressed_size = number_to_human_size(filesize)
|
42
|
+
compressed_size = number_to_human_size(File.size?(full_path))
|
43
|
+
"+ #{'%-21s' % self.sitemap_path} #{'%10s' % self.link_count} sitemaps / #{'%10s' % uncompressed_size} / #{'%10s' % compressed_size} gzipped"
|
30
44
|
end
|
31
45
|
end
|
32
46
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'builder'
|
2
|
+
|
3
|
+
module SitemapGenerator
|
4
|
+
module Builder
|
5
|
+
class SitemapIndexUrl < SitemapUrl
|
6
|
+
|
7
|
+
def initialize(path, options={})
|
8
|
+
if path.is_a?(SitemapGenerator::Builder::SitemapIndexFile)
|
9
|
+
options.reverse_merge!(:host => path.hostname, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
10
|
+
path = path.sitemap_path
|
11
|
+
super(path, options)
|
12
|
+
else
|
13
|
+
super
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return the URL as XML
|
18
|
+
def to_xml(builder=nil)
|
19
|
+
builder = ::Builder::XmlMarkup.new if builder.nil?
|
20
|
+
builder.sitemap do
|
21
|
+
builder.loc self[:loc]
|
22
|
+
builder.lastmod w3c_date(self[:lastmod]) if self[:lastmod]
|
23
|
+
end
|
24
|
+
builder << '' # force to string
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'builder'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module SitemapGenerator
|
5
|
+
module Builder
|
6
|
+
class SitemapUrl < Hash
|
7
|
+
|
8
|
+
# Call with:
|
9
|
+
# sitemap - a Sitemap instance, or
|
10
|
+
# path, options - a path for the URL and options hash
|
11
|
+
def initialize(path, options={})
|
12
|
+
if path.is_a?(SitemapGenerator::Builder::SitemapFile)
|
13
|
+
options.reverse_merge!(:host => path.hostname, :lastmod => path.lastmod)
|
14
|
+
path = path.sitemap_path
|
15
|
+
end
|
16
|
+
|
17
|
+
options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images, :video)
|
18
|
+
options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
|
19
|
+
self.merge!(
|
20
|
+
:path => path,
|
21
|
+
:priority => options[:priority],
|
22
|
+
:changefreq => options[:changefreq],
|
23
|
+
:lastmod => options[:lastmod],
|
24
|
+
:host => options[:host],
|
25
|
+
:loc => URI.join(options[:host], path).to_s,
|
26
|
+
:images => prepare_images(options[:images], options[:host]),
|
27
|
+
:video => options[:video]
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Return the URL as XML
|
32
|
+
def to_xml(builder=nil)
|
33
|
+
builder = ::Builder::XmlMarkup.new if builder.nil?
|
34
|
+
builder.url do
|
35
|
+
builder.loc self[:loc]
|
36
|
+
builder.lastmod w3c_date(self[:lastmod]) if self[:lastmod]
|
37
|
+
builder.changefreq self[:changefreq] if self[:changefreq]
|
38
|
+
builder.priority self[:priority] if self[:priority]
|
39
|
+
|
40
|
+
unless self[:images].blank?
|
41
|
+
self[:images].each do |image|
|
42
|
+
builder.image:image do
|
43
|
+
builder.image :loc, image[:loc]
|
44
|
+
builder.image :caption, image[:caption] if image[:caption]
|
45
|
+
builder.image :geo_location, image[:geo_location] if image[:geo_location]
|
46
|
+
builder.image :title, image[:title] if image[:title]
|
47
|
+
builder.image :license, image[:license] if image[:license]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
unless self[:video].blank?
|
53
|
+
video = self[:video]
|
54
|
+
builder.video :video do
|
55
|
+
builder.video :content_loc, video[:content_loc] if video[:content_loc]
|
56
|
+
if video[:player_loc]
|
57
|
+
builder.video :player_loc, video[:player_loc], :allow_embed => (video[:allow_embed] ? 'yes' : 'no'), :autoplay => video[:autoplay]
|
58
|
+
end
|
59
|
+
builder.video :thumbnail_loc, video[:thumbnail_loc]
|
60
|
+
builder.video :title, video[:title]
|
61
|
+
builder.video :description, video[:description]
|
62
|
+
|
63
|
+
builder.video :rating, video[:rating] if video[:rating]
|
64
|
+
builder.video :view_count, video[:view_count] if video[:view_count]
|
65
|
+
builder.video :publication_date, video[:publication_date] if video[:publication_date]
|
66
|
+
builder.video :expiration_date, video[:expiration_date] if video[:expiration_date]
|
67
|
+
builder.video :duration, video[:duration] if video[:duration]
|
68
|
+
builder.video :family_friendly, (video[:family_friendly] ? 'yes' : 'no') if video[:family_friendly]
|
69
|
+
builder.video :duration, video[:duration] if video[:duration]
|
70
|
+
video[:tags].each {|tag| builder.video :tag, tag } if video[:tags]
|
71
|
+
builder.video :tag, video[:tag] if video[:tag]
|
72
|
+
builder.video :category, video[:category] if video[:category]
|
73
|
+
builder.video :gallery_loc, video[:gallery_loc] if video[:gallery_loc]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
builder << '' # Force to string
|
78
|
+
end
|
79
|
+
|
80
|
+
protected
|
81
|
+
|
82
|
+
# Return an Array of image option Hashes suitable to be parsed by SitemapGenerator::Builder::SitemapFile
|
83
|
+
def prepare_images(images, host)
|
84
|
+
images.delete_if { |key,value| key[:loc] == nil }
|
85
|
+
images.each do |r|
|
86
|
+
r.assert_valid_keys(:loc, :caption, :geo_location, :title, :license)
|
87
|
+
r[:loc] = URI.join(host, r[:loc]).to_s
|
88
|
+
end
|
89
|
+
images[0..(SitemapGenerator::MAX_SITEMAP_IMAGES-1)]
|
90
|
+
end
|
91
|
+
|
92
|
+
def w3c_date(date)
|
93
|
+
date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -8,26 +8,43 @@ module SitemapGenerator
|
|
8
8
|
include ActionView::Helpers::NumberHelper # for number_with_delimiter
|
9
9
|
|
10
10
|
attr_accessor :default_host, :public_path, :sitemaps_path
|
11
|
-
attr_accessor :sitemap, :
|
11
|
+
attr_accessor :sitemap, :sitemap_index
|
12
12
|
attr_accessor :verbose, :yahoo_app_id
|
13
13
|
|
14
14
|
# Evaluate the sitemap config file and write all sitemaps.
|
15
15
|
#
|
16
|
-
#
|
16
|
+
# The Sitemap Interpreter includes the URL helpers and API methods
|
17
|
+
# that the block argument to `add_links` is evaluted within.
|
18
|
+
#
|
19
|
+
# TODO: Refactor so that we can have multiple instances
|
17
20
|
# of LinkSet.
|
18
21
|
def create
|
19
22
|
require 'sitemap_generator/interpreter'
|
20
23
|
|
21
24
|
self.public_path = File.join(::Rails.root, 'public/') if self.public_path.nil?
|
22
25
|
|
26
|
+
# Default host is not set yet. Set it on these objects when `add_links` is called
|
27
|
+
self.sitemap_index = SitemapGenerator::Builder::SitemapIndexFile.new(public_path, sitemap_index_path)
|
28
|
+
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path)
|
29
|
+
|
23
30
|
start_time = Time.now
|
24
31
|
SitemapGenerator::Interpreter.run
|
25
|
-
|
32
|
+
unless self.sitemap.finalized?
|
33
|
+
self.sitemap_index.add(self.sitemap)
|
34
|
+
puts self.sitemap.summary if verbose
|
35
|
+
end
|
36
|
+
self.sitemap_index.finalize!
|
26
37
|
end_time = Time.now
|
27
|
-
|
28
|
-
|
38
|
+
|
39
|
+
if verbose
|
40
|
+
puts self.sitemap_index.summary
|
41
|
+
puts "\nSitemap stats: #{number_with_delimiter(self.sitemap_index.total_link_count)} links / #{self.sitemap_index.sitemaps.size} sitemaps / " +
|
42
|
+
("%dm%02ds" % (end_time - start_time).divmod(60))
|
43
|
+
end
|
29
44
|
end
|
30
45
|
|
46
|
+
# Constructor
|
47
|
+
#
|
31
48
|
# <tt>public_path</tt> (optional) full path to the directory to write sitemaps in.
|
32
49
|
# Defaults to your Rails <tt>public/</tt> directory.
|
33
50
|
#
|
@@ -40,15 +57,10 @@ module SitemapGenerator
|
|
40
57
|
self.default_host = default_host
|
41
58
|
self.public_path = public_path
|
42
59
|
self.sitemaps_path = sitemaps_path
|
43
|
-
|
44
|
-
# Completed sitemaps
|
45
|
-
self.sitemaps = []
|
46
|
-
end
|
47
|
-
|
48
|
-
def link_count
|
49
|
-
self.sitemaps.inject(0) { |link_count_sum, sitemap| link_count_sum + sitemap.link_count }
|
50
60
|
end
|
51
61
|
|
62
|
+
# Entry point for users.
|
63
|
+
#
|
52
64
|
# Called within the user's eval'ed sitemap config file. Add links to sitemap files
|
53
65
|
# passing a block.
|
54
66
|
#
|
@@ -56,61 +68,29 @@ module SitemapGenerator
|
|
56
68
|
def add_links
|
57
69
|
raise ArgumentError, "Default hostname not set" if default_host.blank?
|
58
70
|
|
59
|
-
#
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
yield Mapper.new(self)
|
65
|
-
end
|
71
|
+
# Set default host on the sitemap objects and seed the sitemap with the default links
|
72
|
+
self.sitemap.hostname = self.sitemap_index.hostname = default_host
|
73
|
+
self.sitemap.add('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
74
|
+
self.sitemap.add(self.sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
66
75
|
|
67
|
-
|
68
|
-
#
|
69
|
-
# Add a link to the current sitemap.
|
70
|
-
def add_link(link)
|
71
|
-
unless self.sitemap << link
|
72
|
-
new_sitemap
|
73
|
-
self.sitemap << link
|
74
|
-
end
|
76
|
+
yield self
|
75
77
|
end
|
76
78
|
|
77
|
-
# Add
|
78
|
-
#
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
|
88
|
-
end
|
89
|
-
|
90
|
-
# Mark the sitemap as complete and add it to the sitemap index
|
91
|
-
unless self.sitemap.empty?
|
92
|
-
self.sitemap.finalize!
|
93
|
-
self.sitemap_index << Link.generate(self.sitemap)
|
94
|
-
self.sitemaps << self.sitemap
|
95
|
-
show_progress(self.sitemap) if verbose
|
96
|
-
|
79
|
+
# Add a link to a Sitemap. If a new Sitemap is required, one will be created for
|
80
|
+
# you.
|
81
|
+
def add(link, options={})
|
82
|
+
begin
|
83
|
+
self.sitemap.add(link, options)
|
84
|
+
rescue SitemapGenerator::SitemapError => e
|
85
|
+
if e.is_a?(SitemapGenerator::SitemapFullError)
|
86
|
+
self.sitemap_index.add(self.sitemap)
|
87
|
+
puts self.sitemap.summary if verbose
|
88
|
+
end
|
97
89
|
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
|
90
|
+
retry
|
98
91
|
end
|
99
92
|
end
|
100
93
|
|
101
|
-
# Report progress line.
|
102
|
-
def show_progress(sitemap)
|
103
|
-
uncompressed_size = number_to_human_size(sitemap.filesize)
|
104
|
-
compressed_size = number_to_human_size(File.size?(sitemap.full_path))
|
105
|
-
puts "+ #{sitemap.sitemap_path} #{sitemap.link_count} links / #{uncompressed_size} / #{compressed_size} gzipped"
|
106
|
-
end
|
107
|
-
|
108
|
-
# Finalize all sitemap files
|
109
|
-
def finalize!
|
110
|
-
new_sitemap
|
111
|
-
self.sitemap_index.finalize!
|
112
|
-
end
|
113
|
-
|
114
94
|
# Ping search engines.
|
115
95
|
#
|
116
96
|
# @see http://en.wikipedia.org/wiki/Sitemap_index
|
@@ -149,18 +129,17 @@ module SitemapGenerator
|
|
149
129
|
end
|
150
130
|
end
|
151
131
|
|
152
|
-
|
153
|
-
|
154
|
-
def add_default_links
|
155
|
-
self.sitemap << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
156
|
-
self.sitemap << Link.generate(self.sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
132
|
+
def link_count
|
133
|
+
self.sitemap_index.total_link_count
|
157
134
|
end
|
135
|
+
|
136
|
+
protected
|
158
137
|
|
159
138
|
# Return the current sitemap filename with index.
|
160
139
|
#
|
161
140
|
# The index depends on the length of the <tt>sitemaps</tt> array.
|
162
141
|
def new_sitemap_path
|
163
|
-
File.join(self.sitemaps_path || '', "sitemap#{self.sitemaps.length + 1}.xml.gz")
|
142
|
+
File.join(self.sitemaps_path || '', "sitemap#{self.sitemap_index.sitemaps.length + 1}.xml.gz")
|
164
143
|
end
|
165
144
|
|
166
145
|
# Return the current sitemap index filename.
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 1.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 1.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Karl Varga
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-
|
19
|
+
date: 2010-10-14 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -76,14 +76,12 @@ files:
|
|
76
76
|
- VERSION
|
77
77
|
- lib/sitemap_generator.rb
|
78
78
|
- lib/sitemap_generator/builder.rb
|
79
|
-
- lib/sitemap_generator/builder/helper.rb
|
80
79
|
- lib/sitemap_generator/builder/sitemap_file.rb
|
81
|
-
- lib/sitemap_generator/builder/sitemap_file.rb.orig
|
82
80
|
- lib/sitemap_generator/builder/sitemap_index_file.rb
|
81
|
+
- lib/sitemap_generator/builder/sitemap_index_url.rb
|
82
|
+
- lib/sitemap_generator/builder/sitemap_url.rb
|
83
83
|
- lib/sitemap_generator/interpreter.rb
|
84
|
-
- lib/sitemap_generator/link.rb
|
85
84
|
- lib/sitemap_generator/link_set.rb
|
86
|
-
- lib/sitemap_generator/mapper.rb
|
87
85
|
- lib/sitemap_generator/railtie.rb
|
88
86
|
- lib/sitemap_generator/tasks.rb
|
89
87
|
- lib/sitemap_generator/templates.rb
|
@@ -1,133 +0,0 @@
|
|
1
|
-
require 'sitemap_generator/builder/helper'
|
2
|
-
require 'builder'
|
3
|
-
require 'zlib'
|
4
|
-
|
5
|
-
module SitemapGenerator
|
6
|
-
module Builder
|
7
|
-
class SitemapFile
|
8
|
-
include SitemapGenerator::Builder::Helper
|
9
|
-
|
10
|
-
attr_accessor :sitemap_path, :public_path, :filesize, :link_count, :hostname
|
11
|
-
|
12
|
-
# <tt>public_path</tt> full path of the directory to write sitemaps in.
|
13
|
-
# Usually your Rails <tt>public/</tt> directory.
|
14
|
-
#
|
15
|
-
# <tt>sitemap_path</tt> relative path including filename of the sitemap
|
16
|
-
# file relative to <tt>public_path</tt>
|
17
|
-
#
|
18
|
-
# <tt>hostname</tt> hostname including protocol to use in all links
|
19
|
-
# e.g. http://en.google.ca
|
20
|
-
def initialize(public_path, sitemap_path, hostname)
|
21
|
-
self.sitemap_path = sitemap_path
|
22
|
-
self.public_path = public_path
|
23
|
-
self.hostname = hostname
|
24
|
-
self.link_count = 0
|
25
|
-
|
26
|
-
@xml_content = '' # XML urlset content
|
27
|
-
@xml_wrapper_start = <<-HTML
|
28
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
29
|
-
<urlset
|
30
|
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
31
|
-
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
|
32
|
-
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
|
33
|
-
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
34
|
-
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
35
|
-
>
|
36
|
-
HTML
|
37
|
-
<<<<<<< HEAD
|
38
|
-
@xml_wrapper_start.gsub!(/\s+/, ' ').gsub!(/ *> */, '>').strip!
|
39
|
-
=======
|
40
|
-
@xml_wrapper_start.gsub!(/\s+/, ' ').gsub!(/ *> */, '>').gsub!(' <','<')
|
41
|
-
>>>>>>> 7f3f574... removing first space on xml start. ' <?xml...' becames '<?xml...>' it's allow firefox xml view
|
42
|
-
@xml_wrapper_end = %q[</urlset>]
|
43
|
-
self.filesize = bytesize(@xml_wrapper_start) + bytesize(@xml_wrapper_end)
|
44
|
-
end
|
45
|
-
|
46
|
-
def lastmod
|
47
|
-
File.mtime(self.full_path) rescue nil
|
48
|
-
end
|
49
|
-
|
50
|
-
def empty?
|
51
|
-
self.link_count == 0
|
52
|
-
end
|
53
|
-
|
54
|
-
def full_url
|
55
|
-
URI.join(self.hostname, self.sitemap_path).to_s
|
56
|
-
end
|
57
|
-
|
58
|
-
def full_path
|
59
|
-
@full_path ||= File.join(self.public_path, self.sitemap_path)
|
60
|
-
end
|
61
|
-
|
62
|
-
# Return a boolean indicating whether the sitemap file can fit another link
|
63
|
-
# of <tt>bytes</tt> bytes in size.
|
64
|
-
def file_can_fit?(bytes)
|
65
|
-
(self.filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && self.link_count < SitemapGenerator::MAX_SITEMAP_LINKS
|
66
|
-
end
|
67
|
-
|
68
|
-
# Add a link to the sitemap file and return a boolean indicating whether the
|
69
|
-
# link was added.
|
70
|
-
#
|
71
|
-
# If a link cannot be added, the file is too large or the link limit has been reached.
|
72
|
-
def add_link(link)
|
73
|
-
xml = build_xml(::Builder::XmlMarkup.new, link)
|
74
|
-
unless file_can_fit?(bytesize(xml))
|
75
|
-
self.finalize!
|
76
|
-
return false
|
77
|
-
end
|
78
|
-
|
79
|
-
@xml_content << xml
|
80
|
-
self.filesize += bytesize(xml)
|
81
|
-
self.link_count += 1
|
82
|
-
true
|
83
|
-
end
|
84
|
-
alias_method :<<, :add_link
|
85
|
-
|
86
|
-
# Return XML as a String
|
87
|
-
def build_xml(builder, link)
|
88
|
-
builder.url do
|
89
|
-
builder.loc link[:loc]
|
90
|
-
builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
|
91
|
-
builder.changefreq link[:changefreq] if link[:changefreq]
|
92
|
-
builder.priority link[:priority] if link[:priority]
|
93
|
-
|
94
|
-
unless link[:images].blank?
|
95
|
-
link[:images].each do |image|
|
96
|
-
builder.image:image do
|
97
|
-
builder.image :loc, image[:loc]
|
98
|
-
builder.image :caption, image[:caption] if image[:caption]
|
99
|
-
builder.image :geo_location, image[:geo_location] if image[:geo_location]
|
100
|
-
builder.image :title, image[:title] if image[:title]
|
101
|
-
builder.image :license, image[:license] if image[:license]
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
builder << ''
|
107
|
-
end
|
108
|
-
|
109
|
-
# Insert the content into the XML "wrapper" and write and close the file.
|
110
|
-
#
|
111
|
-
# All the xml content in the instance is cleared, but attributes like
|
112
|
-
# <tt>filesize</tt> are still available.
|
113
|
-
def finalize!
|
114
|
-
return if self.frozen?
|
115
|
-
|
116
|
-
open(self.full_path, 'wb') do |file|
|
117
|
-
gz = Zlib::GzipWriter.new(file)
|
118
|
-
gz.write @xml_wrapper_start
|
119
|
-
gz.write @xml_content
|
120
|
-
gz.write @xml_wrapper_end
|
121
|
-
gz.close
|
122
|
-
end
|
123
|
-
@xml_content = @xml_wrapper_start = @xml_wrapper_end = ''
|
124
|
-
self.freeze
|
125
|
-
end
|
126
|
-
|
127
|
-
# Return the bytesize length of the string
|
128
|
-
def bytesize(string)
|
129
|
-
string.respond_to?(:bytesize) ? string.bytesize : string.length
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
module SitemapGenerator
|
2
|
-
module Link
|
3
|
-
extend self
|
4
|
-
|
5
|
-
# Return a Hash of options suitable to pass to a SitemapGenerator::Builder::SitemapFile instance.
|
6
|
-
def generate(path, options = {})
|
7
|
-
if path.is_a?(SitemapGenerator::Builder::SitemapFile)
|
8
|
-
options.reverse_merge!(:host => path.hostname, :lastmod => path.lastmod)
|
9
|
-
path = path.sitemap_path
|
10
|
-
end
|
11
|
-
|
12
|
-
options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images, :video)
|
13
|
-
options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
|
14
|
-
{
|
15
|
-
:path => path,
|
16
|
-
:priority => options[:priority],
|
17
|
-
:changefreq => options[:changefreq],
|
18
|
-
:lastmod => options[:lastmod],
|
19
|
-
:host => options[:host],
|
20
|
-
:loc => URI.join(options[:host], path).to_s,
|
21
|
-
:images => prepare_images(options[:images], options[:host]),
|
22
|
-
:video => options[:video]
|
23
|
-
}
|
24
|
-
end
|
25
|
-
|
26
|
-
# Return an Array of image option Hashes suitable to be parsed by SitemapGenerator::Builder::SitemapFile
|
27
|
-
def prepare_images(images, host)
|
28
|
-
images.delete_if { |key,value| key[:loc] == nil }
|
29
|
-
images.each do |r|
|
30
|
-
r.assert_valid_keys(:loc, :caption, :geo_location, :title, :license)
|
31
|
-
r[:loc] = URI.join(host, r[:loc]).to_s
|
32
|
-
end
|
33
|
-
images[0..(SitemapGenerator::MAX_SITEMAP_IMAGES-1)]
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
@@ -1,16 +0,0 @@
|
|
1
|
-
module SitemapGenerator
|
2
|
-
# Generator instances are used to build links.
|
3
|
-
# The object passed to the add_links block in config/sitemap.rb is a Generator instance.
|
4
|
-
class Mapper
|
5
|
-
attr_accessor :set
|
6
|
-
|
7
|
-
def initialize(set)
|
8
|
-
@set = set
|
9
|
-
end
|
10
|
-
|
11
|
-
def add(loc, options = {})
|
12
|
-
set.add_link Link.generate(loc, options)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|