site_maps 0.0.1.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +45 -0
- data/.gitignore +16 -0
- data/.rspec +1 -0
- data/.rubocop.yml +36 -0
- data/.tool-versions +1 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +134 -0
- data/LICENSE.txt +21 -0
- data/README.md +186 -0
- data/Rakefile +4 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/exec/site_maps +9 -0
- data/lib/site-maps.rb +3 -0
- data/lib/site_maps/adapters/adapter.rb +80 -0
- data/lib/site_maps/adapters/aws_sdk/config.rb +51 -0
- data/lib/site_maps/adapters/aws_sdk/location.rb +9 -0
- data/lib/site_maps/adapters/aws_sdk/storage.rb +52 -0
- data/lib/site_maps/adapters/aws_sdk.rb +31 -0
- data/lib/site_maps/adapters/file_system/config.rb +5 -0
- data/lib/site_maps/adapters/file_system/location.rb +35 -0
- data/lib/site_maps/adapters/file_system/storage.rb +61 -0
- data/lib/site_maps/adapters/file_system.rb +26 -0
- data/lib/site_maps/adapters/noop.rb +18 -0
- data/lib/site_maps/atomic_repository.rb +24 -0
- data/lib/site_maps/builder/link.rb +27 -0
- data/lib/site_maps/builder/normalizer.rb +48 -0
- data/lib/site_maps/builder/sitemap_index/item.rb +35 -0
- data/lib/site_maps/builder/sitemap_index.rb +40 -0
- data/lib/site_maps/builder/url.rb +152 -0
- data/lib/site_maps/builder/url_set.rb +92 -0
- data/lib/site_maps/cli.rb +68 -0
- data/lib/site_maps/configuration.rb +119 -0
- data/lib/site_maps/incremental_location.rb +62 -0
- data/lib/site_maps/notification/bus.rb +90 -0
- data/lib/site_maps/notification/event.rb +50 -0
- data/lib/site_maps/notification/publisher.rb +78 -0
- data/lib/site_maps/notification.rb +36 -0
- data/lib/site_maps/primitives/array.rb +15 -0
- data/lib/site_maps/primitives/output.rb +66 -0
- data/lib/site_maps/primitives/string.rb +43 -0
- data/lib/site_maps/process.rb +29 -0
- data/lib/site_maps/railtie.rb +18 -0
- data/lib/site_maps/runner/event_listener.rb +78 -0
- data/lib/site_maps/runner.rb +136 -0
- data/lib/site_maps/sitemap_builder.rb +75 -0
- data/lib/site_maps/sitemap_reader.rb +56 -0
- data/lib/site_maps/version.rb +5 -0
- data/lib/site_maps.rb +112 -0
- data/site_maps.gemspec +44 -0
- metadata +172 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteMaps::Adapters::AwsSdk::Storage
|
4
|
+
attr_reader :config
|
5
|
+
|
6
|
+
def initialize(config)
|
7
|
+
@config = config
|
8
|
+
end
|
9
|
+
|
10
|
+
def upload(location, **options)
|
11
|
+
options[:acl] ||= config.acl if config.acl
|
12
|
+
options[:cache_control] ||= config.cache_control if config.cache_control
|
13
|
+
options[:content_type] ||= location.gzip? ? "application/gzip" : "application/xml"
|
14
|
+
lastmod = options.delete(:last_modified) || Time.now
|
15
|
+
options[:metadata] ||= {}
|
16
|
+
options[:metadata]["given-last-modified"] = lastmod.utc.strftime("%Y-%m-%dT%H:%M:%S%:z")
|
17
|
+
obj = object(location.remote_path)
|
18
|
+
obj.upload_file(location.path, **options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def read(location)
|
22
|
+
obj = object(location.remote_path).get
|
23
|
+
metadata = {
|
24
|
+
content_type: obj.content_type
|
25
|
+
}
|
26
|
+
if (raw = obj.metadata["given-last-modified"]) &&
|
27
|
+
(time = Time.parse(raw))
|
28
|
+
metadata[:last_modified] = time
|
29
|
+
end
|
30
|
+
[obj.body.read, metadata]
|
31
|
+
rescue Aws::S3::Errors::NoSuchKey
|
32
|
+
raise SiteMaps::FileNotFoundError, "File not found: #{location.remote_path}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def delete(location)
|
36
|
+
object(location.remote_path).delete
|
37
|
+
rescue Aws::S3::Errors::NoSuchKey
|
38
|
+
raise SiteMaps::FileNotFoundError, "File not found: #{location.remote_path}"
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def list_objects(prefix:)
|
44
|
+
config.s3_bucket.objects(
|
45
|
+
prefix: prefix
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
def object(remote_path)
|
50
|
+
config.s3_bucket.object(remote_path)
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps::Adapters
|
4
|
+
class AwsSdk < Adapter
|
5
|
+
def write(url, raw_data, **options)
|
6
|
+
location = Location.new(config.directory, url)
|
7
|
+
local_storage.write(location, raw_data)
|
8
|
+
s3_storage.upload(location, **options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def read(url)
|
12
|
+
location = Location.new(config.directory, url)
|
13
|
+
s3_storage.read(location)
|
14
|
+
end
|
15
|
+
|
16
|
+
def delete(url)
|
17
|
+
location = Location.new(config.directory, url)
|
18
|
+
s3_storage.delete(location)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def local_storage
|
24
|
+
@local_storage ||= SiteMaps::Adapters::FileSystem::Storage.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def s3_storage
|
28
|
+
@s3_storage ||= SiteMaps::Adapters::AwsSdk::Storage.new(config)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteMaps::Adapters::FileSystem::Location < Struct.new(:root, :url)
|
4
|
+
ROOT_RE = %r{^/}
|
5
|
+
GZIP_RE = %r{\.gz$}
|
6
|
+
|
7
|
+
def path
|
8
|
+
File.join(
|
9
|
+
root,
|
10
|
+
make_relative(uri.path)
|
11
|
+
)
|
12
|
+
end
|
13
|
+
|
14
|
+
def directory
|
15
|
+
Pathname.new(root).join(remote_relative_dir).to_s
|
16
|
+
end
|
17
|
+
|
18
|
+
def gzip?
|
19
|
+
GZIP_RE.match?(uri.path)
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def uri
|
25
|
+
@uri ||= URI.parse(url)
|
26
|
+
end
|
27
|
+
|
28
|
+
def remote_relative_dir
|
29
|
+
make_relative(File.dirname(uri.path))
|
30
|
+
end
|
31
|
+
|
32
|
+
def make_relative(path)
|
33
|
+
path.sub(ROOT_RE, "")
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteMaps::Adapters::FileSystem::Storage
|
4
|
+
# @param [SiteMaps::Adapters::FileSystem::Location] location
|
5
|
+
# @param [String] raw_data
|
6
|
+
# @return [void]
|
7
|
+
# @raise [SiteMaps::Error] if the path is not a directory
|
8
|
+
def write(location, raw_data, **)
|
9
|
+
dir = location.directory
|
10
|
+
|
11
|
+
if !File.exist?(dir)
|
12
|
+
FileUtils.mkdir_p(dir)
|
13
|
+
elsif !File.directory?(dir)
|
14
|
+
raise SiteMaps::Error.new("The path #{dir} is not a directory")
|
15
|
+
end
|
16
|
+
|
17
|
+
stream = File.open(location.path, "wb")
|
18
|
+
if location.gzip?
|
19
|
+
gzip(stream, raw_data)
|
20
|
+
else
|
21
|
+
plain(stream, raw_data)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param [SiteMaps::Adapters::FileSystem::Location] location
|
26
|
+
# @return [Array<String, Hash>] The raw data and metadata
|
27
|
+
# @raise [SiteMaps::FileNotFoundError] if the file does not exist
|
28
|
+
def read(location)
|
29
|
+
if location.gzip?
|
30
|
+
[Zlib::GzipReader.open(location.path).read, {content_type: "application/gzip"}]
|
31
|
+
else
|
32
|
+
[File.read(location.path), {content_type: "application/xml"}]
|
33
|
+
end
|
34
|
+
rescue Zlib::GzipFile::Error
|
35
|
+
raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
|
36
|
+
rescue Errno::ENOENT
|
37
|
+
raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param [SiteMaps::Adapters::FileSystem::Location] location
|
41
|
+
# @return [void]
|
42
|
+
# @raise [SiteMaps::FileNotFoundError] if the file does not exist
|
43
|
+
def delete(location)
|
44
|
+
File.delete(location.path)
|
45
|
+
rescue Errno::ENOENT
|
46
|
+
raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
|
47
|
+
end
|
48
|
+
|
49
|
+
protected
|
50
|
+
|
51
|
+
def gzip(stream, data)
|
52
|
+
gz = Zlib::GzipWriter.new(stream)
|
53
|
+
gz.write data
|
54
|
+
gz.close
|
55
|
+
end
|
56
|
+
|
57
|
+
def plain(stream, data)
|
58
|
+
stream.write data
|
59
|
+
stream.close
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps::Adapters
|
4
|
+
class FileSystem < Adapter
|
5
|
+
def write(url, raw_data, **)
|
6
|
+
location = Location.new(config.directory, url)
|
7
|
+
storage.write(location, raw_data)
|
8
|
+
end
|
9
|
+
|
10
|
+
def read(url)
|
11
|
+
location = Location.new(config.directory, url)
|
12
|
+
storage.read(location)
|
13
|
+
end
|
14
|
+
|
15
|
+
def delete(url)
|
16
|
+
location = Location.new(config.directory, url)
|
17
|
+
storage.delete(location)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def storage
|
23
|
+
@storage ||= self.class::Storage.new
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps::Adapters
|
4
|
+
class Noop < Adapter
|
5
|
+
def write(_url, _raw_data, **_kwargs)
|
6
|
+
end
|
7
|
+
|
8
|
+
def read(_url)
|
9
|
+
end
|
10
|
+
|
11
|
+
def delete(_url)
|
12
|
+
end
|
13
|
+
|
14
|
+
def fetch_sitemap_index_links
|
15
|
+
[]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps
|
4
|
+
class AtomicRepository
|
5
|
+
attr_reader :main_url, :preloaded_index_links
|
6
|
+
|
7
|
+
def initialize(main_url)
|
8
|
+
@main_url = main_url
|
9
|
+
@preloaded_index_links = Concurrent::Array.new
|
10
|
+
@generated_urls = Concurrent::Hash.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def generate_url(raw_location)
|
14
|
+
location = IncrementalLocation.new(main_url, raw_location)
|
15
|
+
(@generated_urls[location.relative_directory] ||= location).next.url
|
16
|
+
end
|
17
|
+
|
18
|
+
def remaining_index_links
|
19
|
+
preloaded_index_links.reject do |link|
|
20
|
+
@generated_urls.key?(link.relative_directory)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps::Builder
|
4
|
+
class Link
|
5
|
+
attr_reader :uri
|
6
|
+
|
7
|
+
def initialize(base_url, path, params = {})
|
8
|
+
@uri = base_url.is_a?(::URI) ? base_url.dup : ::URI.parse(base_url)
|
9
|
+
@uri.user, @uri.query = nil
|
10
|
+
@uri.path = path
|
11
|
+
@uri.query = Rack::Utils.unescape(Rack::Utils.build_nested_query(params)) if params.is_a?(Hash) && params.any?
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
@uri.to_s
|
16
|
+
end
|
17
|
+
|
18
|
+
def eql?(other)
|
19
|
+
to_s == other.to_s
|
20
|
+
end
|
21
|
+
alias_method :==, :eql?
|
22
|
+
|
23
|
+
def hash
|
24
|
+
to_s.hash
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps::Builder
|
4
|
+
module Normalizer
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def format_float(value)
|
8
|
+
value.is_a?(String) ? value : ("%0.1f" % value)
|
9
|
+
end
|
10
|
+
|
11
|
+
def yes_or_no(value)
|
12
|
+
if value.is_a?(String) && value.match?(/\A(yes|no)\z/i)
|
13
|
+
value.downcase
|
14
|
+
else
|
15
|
+
value ? "yes" : "no"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def yes_or_no_with_default(value, default)
|
20
|
+
value.nil? ? yes_or_no(default) : yes_or_no(value)
|
21
|
+
end
|
22
|
+
|
23
|
+
def w3c_date(date)
|
24
|
+
if date.is_a?(String)
|
25
|
+
date
|
26
|
+
elsif date.respond_to?(:iso8601)
|
27
|
+
date.iso8601.sub(/Z$/i, "+00:00")
|
28
|
+
elsif date.is_a?(Date) && defined?(DateTime) && !date.is_a?(DateTime)
|
29
|
+
date.strftime("%Y-%m-%d")
|
30
|
+
else
|
31
|
+
zulutime = if defined?(DateTime) && date.is_a?(DateTime)
|
32
|
+
date.new_offset(0)
|
33
|
+
elsif date.respond_to?(:utc)
|
34
|
+
date.utc
|
35
|
+
elsif date.is_a?(Integer)
|
36
|
+
Time.at(date).utc
|
37
|
+
end
|
38
|
+
|
39
|
+
if zulutime
|
40
|
+
zulutime.strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
41
|
+
else
|
42
|
+
zone = date.strftime("%z").insert(-3, ":")
|
43
|
+
date.strftime("%Y-%m-%dT%H:%M:%S") + zone
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteMaps::Builder::SitemapIndex::Item < Struct.new(:loc, :lastmod)
|
4
|
+
extend Forwardable
|
5
|
+
|
6
|
+
def to_xml
|
7
|
+
builder = ::Builder::XmlMarkup.new
|
8
|
+
builder.sitemap do
|
9
|
+
builder.loc(loc)
|
10
|
+
builder.lastmod w3c_date(lastmod) if lastmod
|
11
|
+
end
|
12
|
+
builder << "\n"
|
13
|
+
end
|
14
|
+
|
15
|
+
def eql?(other)
|
16
|
+
loc == other.loc
|
17
|
+
end
|
18
|
+
alias_method :==, :eql?
|
19
|
+
|
20
|
+
def hash
|
21
|
+
loc.hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def relative_directory
|
25
|
+
return unless loc =~ %r{^https?://[^/]+(/.*)$}
|
26
|
+
|
27
|
+
val = File.dirname(Regexp.last_match(1))
|
28
|
+
val = val[1..-1] if val.start_with?("/")
|
29
|
+
val
|
30
|
+
end
|
31
|
+
|
32
|
+
protected
|
33
|
+
|
34
|
+
def_delegator SiteMaps::Builder::Normalizer, :w3c_date
|
35
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps::Builder
|
4
|
+
class SitemapIndex
|
5
|
+
HEADER = <<~HEADER
|
6
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
7
|
+
<sitemapindex
|
8
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
9
|
+
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
|
10
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
11
|
+
>
|
12
|
+
HEADER
|
13
|
+
FOOTER = "</sitemapindex>"
|
14
|
+
|
15
|
+
attr_reader :sitemaps
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@sitemaps = Concurrent::Set.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def add(loc, lastmod: nil)
|
22
|
+
sitemap = loc.is_a?(Item) ? loc : Item.new(loc, lastmod)
|
23
|
+
@sitemaps.add(sitemap)
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_xml
|
27
|
+
io = StringIO.new
|
28
|
+
io.puts(HEADER)
|
29
|
+
@sitemaps.each do |sitemap|
|
30
|
+
io.puts(sitemap.to_xml)
|
31
|
+
end
|
32
|
+
io.puts(FOOTER)
|
33
|
+
io.string
|
34
|
+
end
|
35
|
+
|
36
|
+
def empty?
|
37
|
+
@sitemaps.empty?
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps::Builder
|
4
|
+
class URL
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
DEFAULTS = {
|
8
|
+
changefreq: "weekly",
|
9
|
+
priority: 0.5
|
10
|
+
}.freeze
|
11
|
+
|
12
|
+
attr_reader :attributes
|
13
|
+
|
14
|
+
def initialize(link, **attributes)
|
15
|
+
@attributes = DEFAULTS.merge(attributes)
|
16
|
+
@attributes[:loc] = link
|
17
|
+
@attributes[:alternates] = SiteMaps::Primitives::Array.wrap(@attributes[:alternates])
|
18
|
+
@attributes[:videos] = SiteMaps::Primitives::Array.wrap(@attributes[:videos])
|
19
|
+
@attributes[:images] = SiteMaps::Primitives::Array.wrap(@attributes[:images])
|
20
|
+
if (video = @attributes.delete(:video))
|
21
|
+
@attributes[:videos].concat(SiteMaps::Primitives::Array.wrap(video))
|
22
|
+
end
|
23
|
+
if (alternate = @attributes.delete(:alternate))
|
24
|
+
@attributes[:alternates].concat(SiteMaps::Primitives::Array.wrap(alternate))
|
25
|
+
end
|
26
|
+
if (image = @attributes.delete(:image))
|
27
|
+
@attributes[:images].concat(SiteMaps::Primitives::Array.wrap(image))
|
28
|
+
end
|
29
|
+
@attributes[:images] = @attributes[:images][0...SiteMaps::MAX_LENGTH[:images]]
|
30
|
+
end
|
31
|
+
|
32
|
+
def [](key)
|
33
|
+
attributes[key]
|
34
|
+
end
|
35
|
+
|
36
|
+
def last_modified
|
37
|
+
return unless self[:lastmod].respond_to?(:strftime)
|
38
|
+
|
39
|
+
self[:lastmod]
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_xml
|
43
|
+
return @to_xml if defined?(@to_xml)
|
44
|
+
|
45
|
+
builder = ::Builder::XmlMarkup.new
|
46
|
+
builder.url do
|
47
|
+
builder.loc self[:loc]
|
48
|
+
builder.lastmod w3c_date(self[:lastmod]) if self[:lastmod]
|
49
|
+
builder.expires w3c_date(self[:expires]) if self[:expires]
|
50
|
+
builder.changefreq self[:changefreq].to_s if self[:changefreq]
|
51
|
+
builder.priority format_float(self[:priority]) if self[:priority]
|
52
|
+
|
53
|
+
if news?
|
54
|
+
news_data = self[:news]
|
55
|
+
builder.news :news do
|
56
|
+
builder.news :publication do
|
57
|
+
builder.news :name, news_data[:publication_name].to_s if news_data[:publication_name]
|
58
|
+
builder.news :language, news_data[:publication_language].to_s if news_data[:publication_language]
|
59
|
+
end
|
60
|
+
|
61
|
+
builder.news :access, news_data[:access].to_s if news_data[:access]
|
62
|
+
builder.news :genres, news_data[:genres].to_s if news_data[:genres]
|
63
|
+
builder.news :publication_date, w3c_date(news_data[:publication_date]) if news_data[:publication_date]
|
64
|
+
builder.news :title, news_data[:title].to_s if news_data[:title]
|
65
|
+
builder.news :keywords, news_data[:keywords].to_s if news_data[:keywords]
|
66
|
+
builder.news :stock_tickers, news_data[:stock_tickers].to_s if news_data[:stock_tickers]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
self[:images].each do |image|
|
71
|
+
builder.image :image do
|
72
|
+
builder.image :loc, image[:loc]
|
73
|
+
builder.image :caption, image[:caption].to_s if image[:caption]
|
74
|
+
builder.image :geo_location, image[:geo_location].to_s if image[:geo_location]
|
75
|
+
builder.image :title, image[:title].to_s if image[:title]
|
76
|
+
builder.image :license, image[:license].to_s if image[:license]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
self[:videos].each do |video|
|
81
|
+
builder.video :video do
|
82
|
+
builder.video :thumbnail_loc, video[:thumbnail_loc].to_s
|
83
|
+
builder.video :title, video[:title].to_s
|
84
|
+
builder.video :description, video[:description].to_s
|
85
|
+
builder.video :content_loc, video[:content_loc].to_s if video[:content_loc]
|
86
|
+
if video[:player_loc]
|
87
|
+
loc_attributes = {allow_embed: yes_or_no_with_default(video[:allow_embed], true)}
|
88
|
+
loc_attributes[:autoplay] = video[:autoplay].to_s if video[:autoplay]
|
89
|
+
builder.video :player_loc, video[:player_loc].to_s, loc_attributes
|
90
|
+
end
|
91
|
+
builder.video :duration, video[:duration].to_s if video[:duration]
|
92
|
+
builder.video :expiration_date, w3c_date(video[:expiration_date]) if video[:expiration_date]
|
93
|
+
builder.video :rating, format_float(video[:rating]) if video[:rating]
|
94
|
+
builder.video :view_count, video[:view_count].to_s if video[:view_count]
|
95
|
+
builder.video :publication_date, w3c_date(video[:publication_date]) if video[:publication_date]
|
96
|
+
video[:tags]&.each { |tag| builder.video :tag, tag.to_s }
|
97
|
+
builder.video :tag, video[:tag].to_s if video[:tag]
|
98
|
+
builder.video :category, video[:category].to_s if video[:category]
|
99
|
+
builder.video :family_friendly, yes_or_no_with_default(video[:family_friendly], true) if video.has_key?(:family_friendly)
|
100
|
+
builder.video :gallery_loc, video[:gallery_loc].to_s, title: video[:gallery_title].to_s if video[:gallery_loc]
|
101
|
+
builder.video :price, video[:price].to_s, prepare_video_price_attribs(video) if video[:price]
|
102
|
+
if video[:uploader]
|
103
|
+
builder.video :uploader, video[:uploader].to_s, video[:uploader_info] ? {info: video[:uploader_info].to_s} : {}
|
104
|
+
end
|
105
|
+
builder.video :live, yes_or_no_with_default(video[:live], true) if video.has_key?(:live)
|
106
|
+
builder.video :requires_subscription, yes_or_no_with_default(video[:requires_subscription], true) if video.has_key?(:requires_subscription)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
self[:alternates].each do |alternate|
|
111
|
+
rel = alternate[:nofollow] ? "alternate nofollow" : "alternate"
|
112
|
+
attributes = {rel: rel, href: alternate[:href].to_s}
|
113
|
+
attributes[:hreflang] = alternate[:lang].to_s if alternate[:lang]
|
114
|
+
attributes[:media] = alternate[:media].to_s if alternate[:media]
|
115
|
+
builder.xhtml :link, attributes
|
116
|
+
end
|
117
|
+
|
118
|
+
unless self[:mobile].nil?
|
119
|
+
builder.mobile :mobile
|
120
|
+
end
|
121
|
+
|
122
|
+
if self[:pagemap].is_a?(Hash) && (pagemap = self[:pagemap]).any?
|
123
|
+
builder.pagemap :PageMap do
|
124
|
+
SiteMaps::Primitives::Array.wrap(pagemap[:dataobjects]).each do |dataobject|
|
125
|
+
builder.pagemap :DataObject, type: dataobject[:type].to_s, id: dataobject[:id].to_s do
|
126
|
+
SiteMaps::Primitives::Array.wrap(dataobject[:attributes]).each do |attribute|
|
127
|
+
builder.pagemap :Attribute, attribute[:value].to_s, name: attribute[:name].to_s
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
@to_xml = builder << "\n"
|
135
|
+
end
|
136
|
+
|
137
|
+
def news?
|
138
|
+
self[:news].is_a?(Hash) && self[:news].any?
|
139
|
+
end
|
140
|
+
|
141
|
+
def bytesize
|
142
|
+
to_xml.bytesize
|
143
|
+
end
|
144
|
+
|
145
|
+
private
|
146
|
+
|
147
|
+
def_delegator SiteMaps::Builder::Normalizer, :format_float
|
148
|
+
def_delegator SiteMaps::Builder::Normalizer, :yes_or_no
|
149
|
+
def_delegator SiteMaps::Builder::Normalizer, :yes_or_no_with_default
|
150
|
+
def_delegator SiteMaps::Builder::Normalizer, :w3c_date
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SiteMaps::Builder
|
4
|
+
class URLSet
|
5
|
+
SCHEMAS = {
|
6
|
+
"image" => "http://www.google.com/schemas/sitemap-image/1.1",
|
7
|
+
"mobile" => "http://www.google.com/schemas/sitemap-mobile/1.0",
|
8
|
+
"news" => "http://www.google.com/schemas/sitemap-news/0.9",
|
9
|
+
"pagemap" => "http://www.google.com/schemas/sitemap-pagemap/1.0",
|
10
|
+
"video" => "http://www.google.com/schemas/sitemap-video/1.1"
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
HEADER = <<~HEADER
|
14
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
15
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
16
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
17
|
+
xmlns:xhtml="http://www.w3.org/1999/xhtml"
|
18
|
+
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
19
|
+
#{SCHEMAS.map { |name, uri| " xmlns:#{name}=\"#{uri}\"" }.join("\n")}
|
20
|
+
>
|
21
|
+
HEADER
|
22
|
+
FOOTER = "</urlset>"
|
23
|
+
FOOTER_BYTESIZE = FOOTER.bytesize
|
24
|
+
|
25
|
+
attr_reader :content, :links_count, :news_count
|
26
|
+
|
27
|
+
def initialize
|
28
|
+
@content = StringIO.new
|
29
|
+
@content.puts(HEADER)
|
30
|
+
@links_count = 0
|
31
|
+
@news_count = 0
|
32
|
+
@last_modified = nil
|
33
|
+
end
|
34
|
+
|
35
|
+
def add(link, **options)
|
36
|
+
raise SiteMaps::FullSitemapError if finalized?
|
37
|
+
|
38
|
+
url = SiteMaps::Builder::URL.new(link, **options)
|
39
|
+
raise SiteMaps::FullSitemapError unless fit?(url)
|
40
|
+
|
41
|
+
content.puts(url.to_xml)
|
42
|
+
@links_count += 1
|
43
|
+
@news_count += 1 if url.news?
|
44
|
+
if (lastmod = url.last_modified)
|
45
|
+
@last_modified ||= lastmod
|
46
|
+
@last_modified = lastmod if lastmod > @last_modified
|
47
|
+
end
|
48
|
+
url
|
49
|
+
end
|
50
|
+
|
51
|
+
def finalize!
|
52
|
+
return if finalized?
|
53
|
+
|
54
|
+
content.puts(FOOTER)
|
55
|
+
@to_xml = content.string.freeze
|
56
|
+
content.close
|
57
|
+
@to_xml
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_xml
|
61
|
+
return content.string + FOOTER unless finalized?
|
62
|
+
|
63
|
+
@to_xml
|
64
|
+
end
|
65
|
+
|
66
|
+
def finalized?
|
67
|
+
defined?(@to_xml)
|
68
|
+
end
|
69
|
+
|
70
|
+
def empty?
|
71
|
+
links_count.zero?
|
72
|
+
end
|
73
|
+
|
74
|
+
def last_modified
|
75
|
+
@last_modified || Time.now
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def bytesize
|
81
|
+
content.string.bytesize
|
82
|
+
end
|
83
|
+
|
84
|
+
# @param url [Builder::URL]
|
85
|
+
def fit?(url)
|
86
|
+
return false if links_count >= SiteMaps::MAX_LENGTH[:links]
|
87
|
+
return false if url.news? && news_count >= SiteMaps::MAX_LENGTH[:news]
|
88
|
+
|
89
|
+
(bytesize + url.bytesize + FOOTER_BYTESIZE) <= SiteMaps::MAX_FILESIZE
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|