sitemap_generator_ftbpro 5.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +35 -0
- data/MIT-LICENSE +20 -0
- data/README.md +1139 -0
- data/Rakefile +43 -0
- data/VERSION +1 -0
- data/lib/capistrano/sitemap_generator.rb +1 -0
- data/lib/capistrano/tasks/sitemap_generator.cap +36 -0
- data/lib/sitemap_generator/adapters/file_adapter.rb +43 -0
- data/lib/sitemap_generator/adapters/fog_adapter.rb +28 -0
- data/lib/sitemap_generator/adapters/s3_adapter.rb +41 -0
- data/lib/sitemap_generator/adapters/wave_adapter.rb +21 -0
- data/lib/sitemap_generator/adapters.rb +0 -0
- data/lib/sitemap_generator/application.rb +49 -0
- data/lib/sitemap_generator/builder/sitemap_file.rb +171 -0
- data/lib/sitemap_generator/builder/sitemap_index_file.rb +149 -0
- data/lib/sitemap_generator/builder/sitemap_index_url.rb +28 -0
- data/lib/sitemap_generator/builder/sitemap_url.rb +250 -0
- data/lib/sitemap_generator/builder.rb +8 -0
- data/lib/sitemap_generator/core_ext/big_decimal.rb +45 -0
- data/lib/sitemap_generator/core_ext/numeric.rb +48 -0
- data/lib/sitemap_generator/core_ext.rb +3 -0
- data/lib/sitemap_generator/helpers/number_helper.rb +237 -0
- data/lib/sitemap_generator/interpreter.rb +80 -0
- data/lib/sitemap_generator/link_set.rb +665 -0
- data/lib/sitemap_generator/railtie.rb +7 -0
- data/lib/sitemap_generator/sitemap_location.rb +192 -0
- data/lib/sitemap_generator/sitemap_namer.rb +75 -0
- data/lib/sitemap_generator/tasks.rb +53 -0
- data/lib/sitemap_generator/templates.rb +41 -0
- data/lib/sitemap_generator/utilities.rb +181 -0
- data/lib/sitemap_generator.rb +82 -0
- data/lib/tasks/sitemap_generator_tasks.rake +1 -0
- data/rails/install.rb +2 -0
- data/rails/uninstall.rb +2 -0
- data/spec/blueprint.rb +15 -0
- data/spec/files/sitemap.create.rb +12 -0
- data/spec/files/sitemap.groups.rb +49 -0
- data/spec/sitemap_generator/adapters/s3_adapter_spec.rb +23 -0
- data/spec/sitemap_generator/alternate_sitemap_spec.rb +79 -0
- data/spec/sitemap_generator/application_spec.rb +69 -0
- data/spec/sitemap_generator/builder/sitemap_file_spec.rb +110 -0
- data/spec/sitemap_generator/builder/sitemap_index_file_spec.rb +124 -0
- data/spec/sitemap_generator/builder/sitemap_index_url_spec.rb +28 -0
- data/spec/sitemap_generator/builder/sitemap_url_spec.rb +186 -0
- data/spec/sitemap_generator/core_ext/bigdecimal_spec.rb +20 -0
- data/spec/sitemap_generator/core_ext/numeric_spec.rb +43 -0
- data/spec/sitemap_generator/file_adaptor_spec.rb +20 -0
- data/spec/sitemap_generator/geo_sitemap_spec.rb +30 -0
- data/spec/sitemap_generator/helpers/number_helper_spec.rb +196 -0
- data/spec/sitemap_generator/interpreter_spec.rb +90 -0
- data/spec/sitemap_generator/link_set_spec.rb +864 -0
- data/spec/sitemap_generator/mobile_sitemap_spec.rb +27 -0
- data/spec/sitemap_generator/news_sitemap_spec.rb +42 -0
- data/spec/sitemap_generator/pagemap_sitemap_spec.rb +57 -0
- data/spec/sitemap_generator/sitemap_generator_spec.rb +582 -0
- data/spec/sitemap_generator/sitemap_groups_spec.rb +144 -0
- data/spec/sitemap_generator/sitemap_location_spec.rb +210 -0
- data/spec/sitemap_generator/sitemap_namer_spec.rb +96 -0
- data/spec/sitemap_generator/templates_spec.rb +24 -0
- data/spec/sitemap_generator/utilities/existence_spec.rb +26 -0
- data/spec/sitemap_generator/utilities/hash_spec.rb +57 -0
- data/spec/sitemap_generator/utilities/rounding_spec.rb +31 -0
- data/spec/sitemap_generator/utilities_spec.rb +101 -0
- data/spec/sitemap_generator/video_sitemap_spec.rb +117 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/file_macros.rb +39 -0
- data/spec/support/schemas/siteindex.xsd +73 -0
- data/spec/support/schemas/sitemap-geo.xsd +41 -0
- data/spec/support/schemas/sitemap-mobile.xsd +32 -0
- data/spec/support/schemas/sitemap-news.xsd +159 -0
- data/spec/support/schemas/sitemap-pagemap.xsd +97 -0
- data/spec/support/schemas/sitemap-video.xsd +643 -0
- data/spec/support/schemas/sitemap.xsd +115 -0
- data/spec/support/xml_macros.rb +67 -0
- data/templates/sitemap.rb +27 -0
- metadata +226 -0
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
Bundler.require
|
3
|
+
|
4
|
+
desc 'Default: run spec tests.'
|
5
|
+
task :default => :spec
|
6
|
+
|
7
|
+
require "rspec/core/rake_task"
|
8
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
9
|
+
spec.pattern = Dir.glob(['spec/sitemap_generator/**/*'])
|
10
|
+
spec.rspec_opts = ['--backtrace']
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# Helpers
|
15
|
+
#
|
16
|
+
|
17
|
+
def name; @name ||= Dir['*.gemspec'].first.split('.').first end
|
18
|
+
def version; File.read('VERSION').chomp end
|
19
|
+
def gemspec_file; "#{name}.gemspec" end
|
20
|
+
def gem_file; "#{name}-#{version}.gem" end
|
21
|
+
|
22
|
+
#
|
23
|
+
# Release Tasks
|
24
|
+
# @see https://github.com/mojombo/rakegem
|
25
|
+
#
|
26
|
+
|
27
|
+
desc "Create tag v#{version}, build the gem and push to Git"
|
28
|
+
task :release => :build do
|
29
|
+
unless `git branch` =~ /^\* master$/
|
30
|
+
puts "You must be on the master branch to release!"
|
31
|
+
exit!
|
32
|
+
end
|
33
|
+
sh "git tag v#{version}"
|
34
|
+
sh "git push origin master --tags"
|
35
|
+
end
|
36
|
+
|
37
|
+
desc "Build #{gem_file} into the pkg/ directory"
|
38
|
+
task :build do
|
39
|
+
sh "mkdir -p pkg"
|
40
|
+
sh "gem build #{gemspec_file}"
|
41
|
+
sh "mv #{gem_file} pkg"
|
42
|
+
sh "bundle --local"
|
43
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
5.0.4
|
@@ -0,0 +1 @@
|
|
1
|
+
load File.expand_path(File.join('..', 'tasks', 'sitemap_generator.cap'), __FILE__)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
namespace :deploy do
|
2
|
+
namespace :sitemap do
|
3
|
+
desc 'Create sitemap and ping search engines'
|
4
|
+
task :refresh do
|
5
|
+
on roles :web do
|
6
|
+
within release_path do
|
7
|
+
with rails_env: fetch(:rails_env) do
|
8
|
+
execute :rake, "sitemap:refresh"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Create sitemap without pinging search engines'
|
15
|
+
task :create do
|
16
|
+
on roles :web do
|
17
|
+
within release_path do
|
18
|
+
with rails_env: fetch(:rails_env) do
|
19
|
+
execute :rake, "sitemap:create"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
desc 'Clean up sitemaps in sitemap_generator path'
|
26
|
+
task :clean do
|
27
|
+
on roles :web do
|
28
|
+
within release_path do
|
29
|
+
with rails_env: fetch(:rails_env) do
|
30
|
+
execute :rake, "sitemap:clean"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module SitemapGenerator
|
2
|
+
# Class for writing out data to a file.
|
3
|
+
class FileAdapter
|
4
|
+
|
5
|
+
# Write data to a file.
|
6
|
+
# @param location - File object giving the full path and file name of the file.
|
7
|
+
# If the location specifies a directory(ies) which does not exist, the directory(ies)
|
8
|
+
# will be created for you. If the location path ends with `.gz` the data will be
|
9
|
+
# compressed prior to being written out. Otherwise the data will be written out
|
10
|
+
# unchanged.
|
11
|
+
# @param raw_data - data to write to the file.
|
12
|
+
def write(location, raw_data)
|
13
|
+
# Ensure that the directory exists
|
14
|
+
dir = location.directory
|
15
|
+
if !File.exists?(dir)
|
16
|
+
FileUtils.mkdir_p(dir)
|
17
|
+
elsif !File.directory?(dir)
|
18
|
+
raise SitemapError.new("#{dir} should be a directory!")
|
19
|
+
end
|
20
|
+
|
21
|
+
stream = open(location.path, 'wb')
|
22
|
+
if location.path.to_s =~ /.gz$/
|
23
|
+
gzip(stream, raw_data)
|
24
|
+
else
|
25
|
+
plain(stream, raw_data)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Write `data` to a stream, passing the data through a GzipWriter
|
30
|
+
# to compress it.
|
31
|
+
def gzip(stream, data)
|
32
|
+
gz = Zlib::GzipWriter.new(stream)
|
33
|
+
gz.write data
|
34
|
+
gz.close
|
35
|
+
end
|
36
|
+
|
37
|
+
# Write `data` to a stream as is.
|
38
|
+
def plain(stream, data)
|
39
|
+
stream.write data
|
40
|
+
stream.close
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
begin
|
2
|
+
require 'fog'
|
3
|
+
rescue LoadError
|
4
|
+
raise LoadError.new("Missing required 'fog'. Please 'gem install fog' and require it in your application.")
|
5
|
+
end
|
6
|
+
|
7
|
+
module SitemapGenerator
|
8
|
+
class FogAdapter
|
9
|
+
|
10
|
+
def initialize(opts = {})
|
11
|
+
@fog_credentials = opts[:fog_credentials]
|
12
|
+
@fog_directory = opts[:fog_directory]
|
13
|
+
end
|
14
|
+
|
15
|
+
# Call with a SitemapLocation and string data
|
16
|
+
def write(location, raw_data)
|
17
|
+
SitemapGenerator::FileAdapter.new.write(location, raw_data)
|
18
|
+
|
19
|
+
storage = Fog::Storage.new(@fog_credentials)
|
20
|
+
directory = storage.directories.new(:key => @fog_directory)
|
21
|
+
directory.files.create(
|
22
|
+
:key => location.path_in_public,
|
23
|
+
:body => File.open(location.path),
|
24
|
+
:public => true
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
begin
|
2
|
+
require 'fog'
|
3
|
+
rescue LoadError
|
4
|
+
raise LoadError.new("Missing required 'fog'. Please 'gem install fog' and require it in your application.")
|
5
|
+
end
|
6
|
+
|
7
|
+
module SitemapGenerator
|
8
|
+
class S3Adapter
|
9
|
+
|
10
|
+
def initialize(opts = {})
|
11
|
+
@aws_access_key_id = opts[:aws_access_key_id] || ENV['AWS_ACCESS_KEY_ID']
|
12
|
+
@aws_secret_access_key = opts[:aws_secret_access_key] || ENV['AWS_SECRET_ACCESS_KEY']
|
13
|
+
@fog_provider = opts[:fog_provider] || ENV['FOG_PROVIDER']
|
14
|
+
@fog_directory = opts[:fog_directory] || ENV['FOG_DIRECTORY']
|
15
|
+
@fog_region = opts[:fog_region] || ENV['FOG_REGION']
|
16
|
+
@fog_path_style = opts[:fog_path_style] || ENV['FOG_PATH_STYLE']
|
17
|
+
end
|
18
|
+
|
19
|
+
# Call with a SitemapLocation and string data
|
20
|
+
def write(location, raw_data)
|
21
|
+
SitemapGenerator::FileAdapter.new.write(location, raw_data)
|
22
|
+
|
23
|
+
credentials = {
|
24
|
+
:aws_access_key_id => @aws_access_key_id,
|
25
|
+
:aws_secret_access_key => @aws_secret_access_key,
|
26
|
+
:provider => @fog_provider,
|
27
|
+
}
|
28
|
+
credentials[:region] = @fog_region if @fog_region
|
29
|
+
credentials[:path_style] = @fog_path_style if @fog_path_style
|
30
|
+
|
31
|
+
storage = Fog::Storage.new(credentials)
|
32
|
+
directory = storage.directories.new(:key => @fog_directory)
|
33
|
+
directory.files.create(
|
34
|
+
:key => location.path_in_public,
|
35
|
+
:body => File.open(location.path),
|
36
|
+
:public => true
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
begin
|
2
|
+
require 'carrierwave'
|
3
|
+
rescue LoadError
|
4
|
+
raise LoadError.new("Missing required 'carrierwave'. Please 'gem install carrierwave' and require it in your application.")
|
5
|
+
end
|
6
|
+
|
7
|
+
module SitemapGenerator
|
8
|
+
class WaveAdapter < ::CarrierWave::Uploader::Base
|
9
|
+
attr_accessor :store_dir
|
10
|
+
|
11
|
+
# Call with a SitemapLocation and string data
|
12
|
+
def write(location, raw_data)
|
13
|
+
SitemapGenerator::FileAdapter.new.write(location, raw_data)
|
14
|
+
directory = File.dirname(location.path_in_public)
|
15
|
+
if directory != '.'
|
16
|
+
self.store_dir = directory
|
17
|
+
end
|
18
|
+
store!(open(location.path, 'rb'))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
File without changes
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module SitemapGenerator
|
4
|
+
class Application
|
5
|
+
def rails?
|
6
|
+
defined?(Rails)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Returns a boolean indicating whether this environment is Rails 3
|
10
|
+
#
|
11
|
+
# @return [Boolean]
|
12
|
+
def rails3?
|
13
|
+
rails? && Rails.version.to_f >= 3
|
14
|
+
rescue
|
15
|
+
false # Rails.version defined in 2.1.0
|
16
|
+
end
|
17
|
+
|
18
|
+
def root
|
19
|
+
Pathname.new(rails_root || Dir.getwd)
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
|
24
|
+
# Returns the root of the Rails application,
|
25
|
+
# if this is running in a Rails context.
|
26
|
+
# Returns `nil` if no such root is defined.
|
27
|
+
#
|
28
|
+
# @return [String, nil]
|
29
|
+
def rails_root
|
30
|
+
if defined?(::Rails.root)
|
31
|
+
return ::Rails.root.to_s if ::Rails.root
|
32
|
+
raise "ERROR: Rails.root is nil!"
|
33
|
+
end
|
34
|
+
return RAILS_ROOT.to_s if defined?(RAILS_ROOT)
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns the environment of the Rails application,
|
39
|
+
# if this is running in a Rails context.
|
40
|
+
# Returns `nil` if no such environment is defined.
|
41
|
+
#
|
42
|
+
# @return [String, nil]
|
43
|
+
def rails_env
|
44
|
+
return ::Rails.env.to_s if defined?(::Rails.env)
|
45
|
+
return RAILS_ENV.to_s if defined?(RAILS_ENV)
|
46
|
+
return nil
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'sitemap_generator/helpers/number_helper'
|
4
|
+
|
5
|
+
module SitemapGenerator
|
6
|
+
module Builder
|
7
|
+
#
|
8
|
+
# General Usage:
|
9
|
+
#
|
10
|
+
# sitemap = SitemapFile.new(:location => SitemapLocation.new(...))
|
11
|
+
# sitemap.add('/', { ... }) <- add a link to the sitemap
|
12
|
+
# sitemap.finalize! <- write the sitemap file and freeze the object to protect it from further modification
|
13
|
+
#
|
14
|
+
class SitemapFile
|
15
|
+
include SitemapGenerator::Helpers::NumberHelper
|
16
|
+
attr_reader :link_count, :filesize, :location, :news_count
|
17
|
+
|
18
|
+
# === Options
|
19
|
+
#
|
20
|
+
# * <tt>location</tt> - a SitemapGenerator::SitemapLocation instance or a Hash of options
|
21
|
+
# from which a SitemapLocation will be created for you.
|
22
|
+
def initialize(opts={}, schemas)
|
23
|
+
@location = opts.is_a?(Hash) ? SitemapGenerator::SitemapLocation.new(opts) : opts
|
24
|
+
@link_count = 0
|
25
|
+
@news_count = 0
|
26
|
+
@xml_content = '' # XML urlset content
|
27
|
+
@schemas = schemas
|
28
|
+
@xml_wrapper_start = <<-HTML
|
29
|
+
"#{all_schemas}"
|
30
|
+
HTML
|
31
|
+
@xml_wrapper_start.gsub!(/\s+/, ' ').gsub!(/ *> */, '>').strip!
|
32
|
+
@xml_wrapper_start = @xml_wrapper_start.slice(1..-2)
|
33
|
+
@xml_wrapper_end = %q[</urlset>]
|
34
|
+
@filesize = SitemapGenerator::Utilities.bytesize(@xml_wrapper_start) + SitemapGenerator::Utilities.bytesize(@xml_wrapper_end)
|
35
|
+
@written = false
|
36
|
+
@reserved_name = nil # holds the name reserved from the namer
|
37
|
+
@frozen = false # rather than actually freeze, use this boolean
|
38
|
+
end
|
39
|
+
|
40
|
+
def all_schemas
|
41
|
+
xml_start = '<?xml version="1.0" encoding="UTF-8"?>
|
42
|
+
<urlset
|
43
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
44
|
+
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
|
45
|
+
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
46
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" '
|
47
|
+
xml_schemas = (@schemas.collect do |schema, content| "xmlns:#{schema}=\"#{content}\"\n" end).join(" ")
|
48
|
+
xml_end = 'xmlns:xhtml="http://www.w3.org/1999/xhtml" >'
|
49
|
+
return xml_start + xml_schemas + xml_end
|
50
|
+
end
|
51
|
+
|
52
|
+
# If a name has been reserved, use the last modified time from the file.
|
53
|
+
# Otherwise return nil. We don't want to prematurely assign a name
|
54
|
+
# for this sitemap if one has not yet been reserved, because we may
|
55
|
+
# mess up the name-assignment sequence.
|
56
|
+
def lastmod
|
57
|
+
File.mtime(location.path) if location.reserved_name?
|
58
|
+
rescue
|
59
|
+
nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def empty?
|
63
|
+
@link_count == 0
|
64
|
+
end
|
65
|
+
|
66
|
+
# Return a boolean indicating whether the sitemap file can fit another link
|
67
|
+
# of <tt>bytes</tt> bytes in size. You can also pass a string and the
|
68
|
+
# bytesize will be calculated for you.
|
69
|
+
def file_can_fit?(bytes)
|
70
|
+
bytes = bytes.is_a?(String) ? SitemapGenerator::Utilities.bytesize(bytes) : bytes
|
71
|
+
(@filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && @link_count < SitemapGenerator::MAX_SITEMAP_LINKS && @news_count < SitemapGenerator::MAX_SITEMAP_NEWS
|
72
|
+
end
|
73
|
+
|
74
|
+
# Add a link to the sitemap file.
|
75
|
+
#
|
76
|
+
# If a link cannot be added, for example if the file is too large or the link
|
77
|
+
# limit has been reached, a SitemapGenerator::SitemapFullError exception is raised
|
78
|
+
# and the sitemap is finalized.
|
79
|
+
#
|
80
|
+
# If the Sitemap has already been finalized a SitemapGenerator::SitemapFinalizedError
|
81
|
+
# exception is raised.
|
82
|
+
#
|
83
|
+
# Return the new link count.
|
84
|
+
#
|
85
|
+
# Call with:
|
86
|
+
# sitemap_url - a SitemapUrl instance
|
87
|
+
# sitemap, options - a Sitemap instance and options hash
|
88
|
+
# path, options - a path for the URL and options hash. For supported options
|
89
|
+
# see the SitemapGenerator::Builder::SitemapUrl class.
|
90
|
+
#
|
91
|
+
# The link added to the sitemap will use the host from its location object
|
92
|
+
# if no host has been specified.
|
93
|
+
def add(link, options={})
|
94
|
+
raise SitemapGenerator::SitemapFinalizedError if finalized?
|
95
|
+
|
96
|
+
sitemap_url = if link.is_a?(SitemapUrl)
|
97
|
+
link
|
98
|
+
else
|
99
|
+
options[:host] ||= @location.host
|
100
|
+
SitemapUrl.new(link, options)
|
101
|
+
end
|
102
|
+
|
103
|
+
xml = sitemap_url.to_xml
|
104
|
+
raise SitemapGenerator::SitemapFullError if !file_can_fit?(xml)
|
105
|
+
|
106
|
+
if sitemap_url.news?
|
107
|
+
@news_count += 1
|
108
|
+
end
|
109
|
+
|
110
|
+
# Add the XML to the sitemap
|
111
|
+
@xml_content << xml
|
112
|
+
@filesize += SitemapGenerator::Utilities.bytesize(xml)
|
113
|
+
@link_count += 1
|
114
|
+
end
|
115
|
+
|
116
|
+
# "Freeze" this object. Actually just flags it as frozen.
|
117
|
+
#
|
118
|
+
# A SitemapGenerator::SitemapFinalizedError exception is raised if the Sitemap
|
119
|
+
# has already been finalized.
|
120
|
+
def finalize!
|
121
|
+
raise SitemapGenerator::SitemapFinalizedError if finalized?
|
122
|
+
@frozen = true
|
123
|
+
end
|
124
|
+
|
125
|
+
def finalized?
|
126
|
+
@frozen
|
127
|
+
end
|
128
|
+
|
129
|
+
# Write out the sitemap and free up memory.
|
130
|
+
#
|
131
|
+
# All the xml content in the instance is cleared, but attributes like
|
132
|
+
# <tt>filesize</tt> are still available.
|
133
|
+
#
|
134
|
+
# A SitemapGenerator::SitemapError exception is raised if the file has
|
135
|
+
# already been written.
|
136
|
+
def write
|
137
|
+
raise SitemapGenerator::SitemapError.new("Sitemap already written!") if written?
|
138
|
+
finalize! unless finalized?
|
139
|
+
reserve_name
|
140
|
+
@location.write(@xml_wrapper_start + @xml_content + @xml_wrapper_end, link_count)
|
141
|
+
@xml_content = @xml_wrapper_start = @xml_wrapper_end = ''
|
142
|
+
@written = true
|
143
|
+
end
|
144
|
+
|
145
|
+
# Return true if this file has been written out to disk
|
146
|
+
def written?
|
147
|
+
@written
|
148
|
+
end
|
149
|
+
|
150
|
+
# Reserve a name from the namer unless one has already been reserved.
|
151
|
+
# Safe to call more than once.
|
152
|
+
def reserve_name
|
153
|
+
@reserved_name ||= @location.reserve_name
|
154
|
+
end
|
155
|
+
|
156
|
+
# Return a boolean indicating whether a name has been reserved
|
157
|
+
def reserved_name?
|
158
|
+
!!@reserved_name
|
159
|
+
end
|
160
|
+
|
161
|
+
# Return a new instance of the sitemap file with the same options,
|
162
|
+
# and the next name in the sequence.
|
163
|
+
def new
|
164
|
+
location = @location.dup
|
165
|
+
location.delete(:filename) if location.namer
|
166
|
+
self.class.new(location)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module SitemapGenerator
|
2
|
+
module Builder
|
3
|
+
class SitemapIndexFile < SitemapFile
|
4
|
+
|
5
|
+
# === Options
|
6
|
+
#
|
7
|
+
# * <tt>location</tt> - a SitemapGenerator::SitemapIndexLocation instance or a Hash of options
|
8
|
+
# from which a SitemapLocation will be created for you.
|
9
|
+
def initialize(opts={})
|
10
|
+
@location = opts.is_a?(Hash) ? SitemapGenerator::SitemapIndexLocation.new(opts) : opts
|
11
|
+
@link_count = 0
|
12
|
+
@sitemaps_link_count = 0
|
13
|
+
@xml_content = '' # XML urlset content
|
14
|
+
@xml_wrapper_start = <<-HTML
|
15
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
16
|
+
<sitemapindex
|
17
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
18
|
+
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
|
19
|
+
http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
|
20
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
21
|
+
>
|
22
|
+
HTML
|
23
|
+
@xml_wrapper_start.gsub!(/\s+/, ' ').gsub!(/ *> */, '>').strip!
|
24
|
+
@xml_wrapper_end = %q[</sitemapindex>]
|
25
|
+
@filesize = SitemapGenerator::Utilities.bytesize(@xml_wrapper_start) + SitemapGenerator::Utilities.bytesize(@xml_wrapper_end)
|
26
|
+
@written = false
|
27
|
+
@reserved_name = nil # holds the name reserved from the namer
|
28
|
+
@frozen = false # rather than actually freeze, use this boolean
|
29
|
+
@first_sitemap = nil # reference to the first thing added to this index
|
30
|
+
# Store the URL of the first sitemap added because if create_index is
|
31
|
+
# false this is the "index" URL
|
32
|
+
@first_sitemap_url = nil
|
33
|
+
end
|
34
|
+
|
35
|
+
# Finalize sitemaps as they are added to the index.
|
36
|
+
# If it's the first sitemap, finalize it but don't
|
37
|
+
# write it out, because we don't yet know if we need an index. If it's
|
38
|
+
# the second sitemap, we know we need an index, so reserve a name for the
|
39
|
+
# index, and go and write out the first sitemap. If it's the third or
|
40
|
+
# greater sitemap, just finalize and write it out as usual, nothing more
|
41
|
+
# needs to be done.
|
42
|
+
#
|
43
|
+
# If a link is being added to the index manually as a string, then we
|
44
|
+
# can assume that the index is required (unless create_index is false of course).
|
45
|
+
# This seems like the logical thing to do.
|
46
|
+
alias_method :super_add, :add
|
47
|
+
def add(link, options={})
|
48
|
+
if file = link.is_a?(SitemapFile) && link
|
49
|
+
@sitemaps_link_count += file.link_count
|
50
|
+
file.finalize! unless file.finalized?
|
51
|
+
|
52
|
+
# First link. If it's a SitemapFile store a reference to it and the options
|
53
|
+
# so that we can create a URL from it later. We can't create the URL yet
|
54
|
+
# because doing so fixes the sitemap file's name, and we have to wait to see
|
55
|
+
# if we have more than one link in the index before we can know who gets the
|
56
|
+
# first name (the index, or the sitemap). If the item is not a SitemapFile,
|
57
|
+
# then it has been manually added and we can be sure that the user intends
|
58
|
+
# for there to be an index.
|
59
|
+
if @link_count == 0
|
60
|
+
@first_sitemap = SitemapGenerator::Builder::LinkHolder.new(file, options)
|
61
|
+
@link_count += 1 # pretend it's added, but don't add it yet
|
62
|
+
else
|
63
|
+
# need an index so make sure name is reserved and first sitemap is written out
|
64
|
+
reserve_name unless @location.create_index == false
|
65
|
+
write_first_sitemap
|
66
|
+
file.write
|
67
|
+
super(SitemapGenerator::Builder::SitemapIndexUrl.new(file, options))
|
68
|
+
end
|
69
|
+
else
|
70
|
+
# A link is being added manually. Obviously the user wants an index.
|
71
|
+
# This overrides the create_index setting.
|
72
|
+
unless @location.create_index == false
|
73
|
+
@create_index = true
|
74
|
+
reserve_name
|
75
|
+
end
|
76
|
+
|
77
|
+
# Use the host from the location if none provided
|
78
|
+
options[:host] ||= @location.host
|
79
|
+
super(SitemapGenerator::Builder::SitemapIndexUrl.new(link, options))
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Return a boolean indicating whether the sitemap file can fit another link
|
84
|
+
# of <tt>bytes</tt> bytes in size. You can also pass a string and the
|
85
|
+
# bytesize will be calculated for you.
|
86
|
+
def file_can_fit?(bytes)
|
87
|
+
bytes = bytes.is_a?(String) ? SitemapGenerator::Utilities.bytesize(bytes) : bytes
|
88
|
+
(@filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && @link_count < SitemapGenerator::MAX_SITEMAP_FILES
|
89
|
+
end
|
90
|
+
|
91
|
+
# Return the total number of links in all sitemaps reference by this index file
|
92
|
+
def total_link_count
|
93
|
+
@sitemaps_link_count
|
94
|
+
end
|
95
|
+
|
96
|
+
def stats_summary(opts={})
|
97
|
+
str = "Sitemap stats: #{number_with_delimiter(@sitemaps_link_count)} links / #{@link_count} sitemaps"
|
98
|
+
str += " / %dm%02ds" % opts[:time_taken].divmod(60) if opts[:time_taken]
|
99
|
+
end
|
100
|
+
|
101
|
+
def finalize!
|
102
|
+
raise SitemapGenerator::SitemapFinalizedError if finalized?
|
103
|
+
reserve_name if create_index?
|
104
|
+
write_first_sitemap
|
105
|
+
@frozen = true
|
106
|
+
end
|
107
|
+
|
108
|
+
# Write out the index if an index is needed
|
109
|
+
def write
|
110
|
+
super if create_index?
|
111
|
+
end
|
112
|
+
|
113
|
+
# Whether or not we need to create an index file. True if create_index is true
|
114
|
+
# or if create_index is :auto and we have more than one link in the index.
|
115
|
+
# If a link is added manually and create_index is not false, we force index
|
116
|
+
# creation because they obviously intend for there to be an index. False otherwise.
|
117
|
+
def create_index?
|
118
|
+
@create_index || @location.create_index == true || @location.create_index == :auto && @link_count > 1
|
119
|
+
end
|
120
|
+
|
121
|
+
# Return the index file URL. If create_index is true, this is the URL
|
122
|
+
# of the actual index file. If create_index is false, this is the URL
|
123
|
+
# of the first sitemap that was written out. Only call this method
|
124
|
+
# *after* the files have been finalized.
|
125
|
+
def index_url
|
126
|
+
if create_index? || !@first_sitemap_url
|
127
|
+
@location.url
|
128
|
+
else
|
129
|
+
@first_sitemap_url
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
protected
|
134
|
+
|
135
|
+
# Make sure the first sitemap has been written out and added to the index
|
136
|
+
def write_first_sitemap
|
137
|
+
if @first_sitemap
|
138
|
+
@first_sitemap.link.write unless @first_sitemap.link.written?
|
139
|
+
super_add(SitemapGenerator::Builder::SitemapIndexUrl.new(@first_sitemap.link, @first_sitemap.options))
|
140
|
+
@link_count -= 1 # we already counted it, don't count it twice
|
141
|
+
# Store the URL because if create_index is false, this is the
|
142
|
+
# "index" URL
|
143
|
+
@first_sitemap_url = @first_sitemap.link.location.url
|
144
|
+
@first_sitemap = nil
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'builder'
|
2
|
+
|
3
|
+
module SitemapGenerator
|
4
|
+
module Builder
|
5
|
+
class SitemapIndexUrl < SitemapUrl
|
6
|
+
|
7
|
+
def initialize(path, options={})
|
8
|
+
if index = path.is_a?(SitemapGenerator::Builder::SitemapIndexFile) && path
|
9
|
+
options = SitemapGenerator::Utilities.reverse_merge(options, :host => index.location.host, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
10
|
+
path = index.location.path_in_public
|
11
|
+
super(path, options)
|
12
|
+
else
|
13
|
+
super
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return the URL as XML
|
18
|
+
def to_xml(builder=nil)
|
19
|
+
builder = ::Builder::XmlMarkup.new if builder.nil?
|
20
|
+
builder.sitemap do
|
21
|
+
builder.loc self[:loc]
|
22
|
+
builder.lastmod w3c_date(self[:lastmod]) if self[:lastmod]
|
23
|
+
end
|
24
|
+
builder << '' # force to string
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|