site_maps 0.0.1.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/main.yml +45 -0
  3. data/.gitignore +16 -0
  4. data/.rspec +1 -0
  5. data/.rubocop.yml +36 -0
  6. data/.tool-versions +1 -0
  7. data/CHANGELOG.md +8 -0
  8. data/Gemfile +18 -0
  9. data/Gemfile.lock +134 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +186 -0
  12. data/Rakefile +4 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +7 -0
  15. data/exec/site_maps +9 -0
  16. data/lib/site-maps.rb +3 -0
  17. data/lib/site_maps/adapters/adapter.rb +80 -0
  18. data/lib/site_maps/adapters/aws_sdk/config.rb +51 -0
  19. data/lib/site_maps/adapters/aws_sdk/location.rb +9 -0
  20. data/lib/site_maps/adapters/aws_sdk/storage.rb +52 -0
  21. data/lib/site_maps/adapters/aws_sdk.rb +31 -0
  22. data/lib/site_maps/adapters/file_system/config.rb +5 -0
  23. data/lib/site_maps/adapters/file_system/location.rb +35 -0
  24. data/lib/site_maps/adapters/file_system/storage.rb +61 -0
  25. data/lib/site_maps/adapters/file_system.rb +26 -0
  26. data/lib/site_maps/adapters/noop.rb +18 -0
  27. data/lib/site_maps/atomic_repository.rb +24 -0
  28. data/lib/site_maps/builder/link.rb +27 -0
  29. data/lib/site_maps/builder/normalizer.rb +48 -0
  30. data/lib/site_maps/builder/sitemap_index/item.rb +35 -0
  31. data/lib/site_maps/builder/sitemap_index.rb +40 -0
  32. data/lib/site_maps/builder/url.rb +152 -0
  33. data/lib/site_maps/builder/url_set.rb +92 -0
  34. data/lib/site_maps/cli.rb +68 -0
  35. data/lib/site_maps/configuration.rb +119 -0
  36. data/lib/site_maps/incremental_location.rb +62 -0
  37. data/lib/site_maps/notification/bus.rb +90 -0
  38. data/lib/site_maps/notification/event.rb +50 -0
  39. data/lib/site_maps/notification/publisher.rb +78 -0
  40. data/lib/site_maps/notification.rb +36 -0
  41. data/lib/site_maps/primitives/array.rb +15 -0
  42. data/lib/site_maps/primitives/output.rb +66 -0
  43. data/lib/site_maps/primitives/string.rb +43 -0
  44. data/lib/site_maps/process.rb +29 -0
  45. data/lib/site_maps/railtie.rb +18 -0
  46. data/lib/site_maps/runner/event_listener.rb +78 -0
  47. data/lib/site_maps/runner.rb +136 -0
  48. data/lib/site_maps/sitemap_builder.rb +75 -0
  49. data/lib/site_maps/sitemap_reader.rb +56 -0
  50. data/lib/site_maps/version.rb +5 -0
  51. data/lib/site_maps.rb +112 -0
  52. data/site_maps.gemspec +44 -0
  53. metadata +172 -0
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Adapters::AwsSdk::Storage
4
+ attr_reader :config
5
+
6
+ def initialize(config)
7
+ @config = config
8
+ end
9
+
10
+ def upload(location, **options)
11
+ options[:acl] ||= config.acl if config.acl
12
+ options[:cache_control] ||= config.cache_control if config.cache_control
13
+ options[:content_type] ||= location.gzip? ? "application/gzip" : "application/xml"
14
+ lastmod = options.delete(:last_modified) || Time.now
15
+ options[:metadata] ||= {}
16
+ options[:metadata]["given-last-modified"] = lastmod.utc.strftime("%Y-%m-%dT%H:%M:%S%:z")
17
+ obj = object(location.remote_path)
18
+ obj.upload_file(location.path, **options)
19
+ end
20
+
21
+ def read(location)
22
+ obj = object(location.remote_path).get
23
+ metadata = {
24
+ content_type: obj.content_type
25
+ }
26
+ if (raw = obj.metadata["given-last-modified"]) &&
27
+ (time = Time.parse(raw))
28
+ metadata[:last_modified] = time
29
+ end
30
+ [obj.body.read, metadata]
31
+ rescue Aws::S3::Errors::NoSuchKey
32
+ raise SiteMaps::FileNotFoundError, "File not found: #{location.remote_path}"
33
+ end
34
+
35
+ def delete(location)
36
+ object(location.remote_path).delete
37
+ rescue Aws::S3::Errors::NoSuchKey
38
+ raise SiteMaps::FileNotFoundError, "File not found: #{location.remote_path}"
39
+ end
40
+
41
+ private
42
+
43
+ def list_objects(prefix:)
44
+ config.s3_bucket.objects(
45
+ prefix: prefix
46
+ )
47
+ end
48
+
49
+ def object(remote_path)
50
+ config.s3_bucket.object(remote_path)
51
+ end
52
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Adapters
4
+ class AwsSdk < Adapter
5
+ def write(url, raw_data, **options)
6
+ location = Location.new(config.directory, url)
7
+ local_storage.write(location, raw_data)
8
+ s3_storage.upload(location, **options)
9
+ end
10
+
11
+ def read(url)
12
+ location = Location.new(config.directory, url)
13
+ s3_storage.read(location)
14
+ end
15
+
16
+ def delete(url)
17
+ location = Location.new(config.directory, url)
18
+ s3_storage.delete(location)
19
+ end
20
+
21
+ private
22
+
23
+ def local_storage
24
+ @local_storage ||= SiteMaps::Adapters::FileSystem::Storage.new
25
+ end
26
+
27
+ def s3_storage
28
+ @s3_storage ||= SiteMaps::Adapters::AwsSdk::Storage.new(config)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Adapters::FileSystem::Config < SiteMaps::Configuration
4
+ attribute :directory, default: "public/sitemaps"
5
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Adapters::FileSystem::Location < Struct.new(:root, :url)
4
+ ROOT_RE = %r{^/}
5
+ GZIP_RE = %r{\.gz$}
6
+
7
+ def path
8
+ File.join(
9
+ root,
10
+ make_relative(uri.path)
11
+ )
12
+ end
13
+
14
+ def directory
15
+ Pathname.new(root).join(remote_relative_dir).to_s
16
+ end
17
+
18
+ def gzip?
19
+ GZIP_RE.match?(uri.path)
20
+ end
21
+
22
+ private
23
+
24
+ def uri
25
+ @uri ||= URI.parse(url)
26
+ end
27
+
28
+ def remote_relative_dir
29
+ make_relative(File.dirname(uri.path))
30
+ end
31
+
32
+ def make_relative(path)
33
+ path.sub(ROOT_RE, "")
34
+ end
35
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Adapters::FileSystem::Storage
4
+ # @param [SiteMaps::Adapters::FileSystem::Location] location
5
+ # @param [String] raw_data
6
+ # @return [void]
7
+ # @raise [SiteMaps::Error] if the path is not a directory
8
+ def write(location, raw_data, **)
9
+ dir = location.directory
10
+
11
+ if !File.exist?(dir)
12
+ FileUtils.mkdir_p(dir)
13
+ elsif !File.directory?(dir)
14
+ raise SiteMaps::Error.new("The path #{dir} is not a directory")
15
+ end
16
+
17
+ stream = File.open(location.path, "wb")
18
+ if location.gzip?
19
+ gzip(stream, raw_data)
20
+ else
21
+ plain(stream, raw_data)
22
+ end
23
+ end
24
+
25
+ # @param [SiteMaps::Adapters::FileSystem::Location] location
26
+ # @return [Array<String, Hash>] The raw data and metadata
27
+ # @raise [SiteMaps::FileNotFoundError] if the file does not exist
28
+ def read(location)
29
+ if location.gzip?
30
+ [Zlib::GzipReader.open(location.path).read, {content_type: "application/gzip"}]
31
+ else
32
+ [File.read(location.path), {content_type: "application/xml"}]
33
+ end
34
+ rescue Zlib::GzipFile::Error
35
+ raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
36
+ rescue Errno::ENOENT
37
+ raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
38
+ end
39
+
40
+ # @param [SiteMaps::Adapters::FileSystem::Location] location
41
+ # @return [void]
42
+ # @raise [SiteMaps::FileNotFoundError] if the file does not exist
43
+ def delete(location)
44
+ File.delete(location.path)
45
+ rescue Errno::ENOENT
46
+ raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
47
+ end
48
+
49
+ protected
50
+
51
+ def gzip(stream, data)
52
+ gz = Zlib::GzipWriter.new(stream)
53
+ gz.write data
54
+ gz.close
55
+ end
56
+
57
+ def plain(stream, data)
58
+ stream.write data
59
+ stream.close
60
+ end
61
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Adapters
4
+ class FileSystem < Adapter
5
+ def write(url, raw_data, **)
6
+ location = Location.new(config.directory, url)
7
+ storage.write(location, raw_data)
8
+ end
9
+
10
+ def read(url)
11
+ location = Location.new(config.directory, url)
12
+ storage.read(location)
13
+ end
14
+
15
+ def delete(url)
16
+ location = Location.new(config.directory, url)
17
+ storage.delete(location)
18
+ end
19
+
20
+ private
21
+
22
+ def storage
23
+ @storage ||= self.class::Storage.new
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Adapters
4
+ class Noop < Adapter
5
+ def write(_url, _raw_data, **_kwargs)
6
+ end
7
+
8
+ def read(_url)
9
+ end
10
+
11
+ def delete(_url)
12
+ end
13
+
14
+ def fetch_sitemap_index_links
15
+ []
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ class AtomicRepository
5
+ attr_reader :main_url, :preloaded_index_links
6
+
7
+ def initialize(main_url)
8
+ @main_url = main_url
9
+ @preloaded_index_links = Concurrent::Array.new
10
+ @generated_urls = Concurrent::Hash.new
11
+ end
12
+
13
+ def generate_url(raw_location)
14
+ location = IncrementalLocation.new(main_url, raw_location)
15
+ (@generated_urls[location.relative_directory] ||= location).next.url
16
+ end
17
+
18
+ def remaining_index_links
19
+ preloaded_index_links.reject do |link|
20
+ @generated_urls.key?(link.relative_directory)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class Link
5
+ attr_reader :uri
6
+
7
+ def initialize(base_url, path, params = {})
8
+ @uri = base_url.is_a?(::URI) ? base_url.dup : ::URI.parse(base_url)
9
+ @uri.user, @uri.query = nil
10
+ @uri.path = path
11
+ @uri.query = Rack::Utils.unescape(Rack::Utils.build_nested_query(params)) if params.is_a?(Hash) && params.any?
12
+ end
13
+
14
+ def to_s
15
+ @uri.to_s
16
+ end
17
+
18
+ def eql?(other)
19
+ to_s == other.to_s
20
+ end
21
+ alias_method :==, :eql?
22
+
23
+ def hash
24
+ to_s.hash
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ module Normalizer
5
+ extend self
6
+
7
+ def format_float(value)
8
+ value.is_a?(String) ? value : ("%0.1f" % value)
9
+ end
10
+
11
+ def yes_or_no(value)
12
+ if value.is_a?(String) && value.match?(/\A(yes|no)\z/i)
13
+ value.downcase
14
+ else
15
+ value ? "yes" : "no"
16
+ end
17
+ end
18
+
19
+ def yes_or_no_with_default(value, default)
20
+ value.nil? ? yes_or_no(default) : yes_or_no(value)
21
+ end
22
+
23
+ def w3c_date(date)
24
+ if date.is_a?(String)
25
+ date
26
+ elsif date.respond_to?(:iso8601)
27
+ date.iso8601.sub(/Z$/i, "+00:00")
28
+ elsif date.is_a?(Date) && defined?(DateTime) && !date.is_a?(DateTime)
29
+ date.strftime("%Y-%m-%d")
30
+ else
31
+ zulutime = if defined?(DateTime) && date.is_a?(DateTime)
32
+ date.new_offset(0)
33
+ elsif date.respond_to?(:utc)
34
+ date.utc
35
+ elsif date.is_a?(Integer)
36
+ Time.at(date).utc
37
+ end
38
+
39
+ if zulutime
40
+ zulutime.strftime("%Y-%m-%dT%H:%M:%S+00:00")
41
+ else
42
+ zone = date.strftime("%z").insert(-3, ":")
43
+ date.strftime("%Y-%m-%dT%H:%M:%S") + zone
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Builder::SitemapIndex::Item < Struct.new(:loc, :lastmod)
4
+ extend Forwardable
5
+
6
+ def to_xml
7
+ builder = ::Builder::XmlMarkup.new
8
+ builder.sitemap do
9
+ builder.loc(loc)
10
+ builder.lastmod w3c_date(lastmod) if lastmod
11
+ end
12
+ builder << "\n"
13
+ end
14
+
15
+ def eql?(other)
16
+ loc == other.loc
17
+ end
18
+ alias_method :==, :eql?
19
+
20
+ def hash
21
+ loc.hash
22
+ end
23
+
24
+ def relative_directory
25
+ return unless loc =~ %r{^https?://[^/]+(/.*)$}
26
+
27
+ val = File.dirname(Regexp.last_match(1))
28
+ val = val[1..-1] if val.start_with?("/")
29
+ val
30
+ end
31
+
32
+ protected
33
+
34
+ def_delegator SiteMaps::Builder::Normalizer, :w3c_date
35
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class SitemapIndex
5
+ HEADER = <<~HEADER
6
+ <?xml version="1.0" encoding="UTF-8"?>
7
+ <sitemapindex
8
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
9
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
10
+ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
11
+ >
12
+ HEADER
13
+ FOOTER = "</sitemapindex>"
14
+
15
+ attr_reader :sitemaps
16
+
17
+ def initialize
18
+ @sitemaps = Concurrent::Set.new
19
+ end
20
+
21
+ def add(loc, lastmod: nil)
22
+ sitemap = loc.is_a?(Item) ? loc : Item.new(loc, lastmod)
23
+ @sitemaps.add(sitemap)
24
+ end
25
+
26
+ def to_xml
27
+ io = StringIO.new
28
+ io.puts(HEADER)
29
+ @sitemaps.each do |sitemap|
30
+ io.puts(sitemap.to_xml)
31
+ end
32
+ io.puts(FOOTER)
33
+ io.string
34
+ end
35
+
36
+ def empty?
37
+ @sitemaps.empty?
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class URL
5
+ extend Forwardable
6
+
7
+ DEFAULTS = {
8
+ changefreq: "weekly",
9
+ priority: 0.5
10
+ }.freeze
11
+
12
+ attr_reader :attributes
13
+
14
+ def initialize(link, **attributes)
15
+ @attributes = DEFAULTS.merge(attributes)
16
+ @attributes[:loc] = link
17
+ @attributes[:alternates] = SiteMaps::Primitives::Array.wrap(@attributes[:alternates])
18
+ @attributes[:videos] = SiteMaps::Primitives::Array.wrap(@attributes[:videos])
19
+ @attributes[:images] = SiteMaps::Primitives::Array.wrap(@attributes[:images])
20
+ if (video = @attributes.delete(:video))
21
+ @attributes[:videos].concat(SiteMaps::Primitives::Array.wrap(video))
22
+ end
23
+ if (alternate = @attributes.delete(:alternate))
24
+ @attributes[:alternates].concat(SiteMaps::Primitives::Array.wrap(alternate))
25
+ end
26
+ if (image = @attributes.delete(:image))
27
+ @attributes[:images].concat(SiteMaps::Primitives::Array.wrap(image))
28
+ end
29
+ @attributes[:images] = @attributes[:images][0...SiteMaps::MAX_LENGTH[:images]]
30
+ end
31
+
32
+ def [](key)
33
+ attributes[key]
34
+ end
35
+
36
+ def last_modified
37
+ return unless self[:lastmod].respond_to?(:strftime)
38
+
39
+ self[:lastmod]
40
+ end
41
+
42
+ def to_xml
43
+ return @to_xml if defined?(@to_xml)
44
+
45
+ builder = ::Builder::XmlMarkup.new
46
+ builder.url do
47
+ builder.loc self[:loc]
48
+ builder.lastmod w3c_date(self[:lastmod]) if self[:lastmod]
49
+ builder.expires w3c_date(self[:expires]) if self[:expires]
50
+ builder.changefreq self[:changefreq].to_s if self[:changefreq]
51
+ builder.priority format_float(self[:priority]) if self[:priority]
52
+
53
+ if news?
54
+ news_data = self[:news]
55
+ builder.news :news do
56
+ builder.news :publication do
57
+ builder.news :name, news_data[:publication_name].to_s if news_data[:publication_name]
58
+ builder.news :language, news_data[:publication_language].to_s if news_data[:publication_language]
59
+ end
60
+
61
+ builder.news :access, news_data[:access].to_s if news_data[:access]
62
+ builder.news :genres, news_data[:genres].to_s if news_data[:genres]
63
+ builder.news :publication_date, w3c_date(news_data[:publication_date]) if news_data[:publication_date]
64
+ builder.news :title, news_data[:title].to_s if news_data[:title]
65
+ builder.news :keywords, news_data[:keywords].to_s if news_data[:keywords]
66
+ builder.news :stock_tickers, news_data[:stock_tickers].to_s if news_data[:stock_tickers]
67
+ end
68
+ end
69
+
70
+ self[:images].each do |image|
71
+ builder.image :image do
72
+ builder.image :loc, image[:loc]
73
+ builder.image :caption, image[:caption].to_s if image[:caption]
74
+ builder.image :geo_location, image[:geo_location].to_s if image[:geo_location]
75
+ builder.image :title, image[:title].to_s if image[:title]
76
+ builder.image :license, image[:license].to_s if image[:license]
77
+ end
78
+ end
79
+
80
+ self[:videos].each do |video|
81
+ builder.video :video do
82
+ builder.video :thumbnail_loc, video[:thumbnail_loc].to_s
83
+ builder.video :title, video[:title].to_s
84
+ builder.video :description, video[:description].to_s
85
+ builder.video :content_loc, video[:content_loc].to_s if video[:content_loc]
86
+ if video[:player_loc]
87
+ loc_attributes = {allow_embed: yes_or_no_with_default(video[:allow_embed], true)}
88
+ loc_attributes[:autoplay] = video[:autoplay].to_s if video[:autoplay]
89
+ builder.video :player_loc, video[:player_loc].to_s, loc_attributes
90
+ end
91
+ builder.video :duration, video[:duration].to_s if video[:duration]
92
+ builder.video :expiration_date, w3c_date(video[:expiration_date]) if video[:expiration_date]
93
+ builder.video :rating, format_float(video[:rating]) if video[:rating]
94
+ builder.video :view_count, video[:view_count].to_s if video[:view_count]
95
+ builder.video :publication_date, w3c_date(video[:publication_date]) if video[:publication_date]
96
+ video[:tags]&.each { |tag| builder.video :tag, tag.to_s }
97
+ builder.video :tag, video[:tag].to_s if video[:tag]
98
+ builder.video :category, video[:category].to_s if video[:category]
99
+ builder.video :family_friendly, yes_or_no_with_default(video[:family_friendly], true) if video.has_key?(:family_friendly)
100
+ builder.video :gallery_loc, video[:gallery_loc].to_s, title: video[:gallery_title].to_s if video[:gallery_loc]
101
+ builder.video :price, video[:price].to_s, prepare_video_price_attribs(video) if video[:price]
102
+ if video[:uploader]
103
+ builder.video :uploader, video[:uploader].to_s, video[:uploader_info] ? {info: video[:uploader_info].to_s} : {}
104
+ end
105
+ builder.video :live, yes_or_no_with_default(video[:live], true) if video.has_key?(:live)
106
+ builder.video :requires_subscription, yes_or_no_with_default(video[:requires_subscription], true) if video.has_key?(:requires_subscription)
107
+ end
108
+ end
109
+
110
+ self[:alternates].each do |alternate|
111
+ rel = alternate[:nofollow] ? "alternate nofollow" : "alternate"
112
+ attributes = {rel: rel, href: alternate[:href].to_s}
113
+ attributes[:hreflang] = alternate[:lang].to_s if alternate[:lang]
114
+ attributes[:media] = alternate[:media].to_s if alternate[:media]
115
+ builder.xhtml :link, attributes
116
+ end
117
+
118
+ unless self[:mobile].nil?
119
+ builder.mobile :mobile
120
+ end
121
+
122
+ if self[:pagemap].is_a?(Hash) && (pagemap = self[:pagemap]).any?
123
+ builder.pagemap :PageMap do
124
+ SiteMaps::Primitives::Array.wrap(pagemap[:dataobjects]).each do |dataobject|
125
+ builder.pagemap :DataObject, type: dataobject[:type].to_s, id: dataobject[:id].to_s do
126
+ SiteMaps::Primitives::Array.wrap(dataobject[:attributes]).each do |attribute|
127
+ builder.pagemap :Attribute, attribute[:value].to_s, name: attribute[:name].to_s
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+ @to_xml = builder << "\n"
135
+ end
136
+
137
+ def news?
138
+ self[:news].is_a?(Hash) && self[:news].any?
139
+ end
140
+
141
+ def bytesize
142
+ to_xml.bytesize
143
+ end
144
+
145
+ private
146
+
147
+ def_delegator SiteMaps::Builder::Normalizer, :format_float
148
+ def_delegator SiteMaps::Builder::Normalizer, :yes_or_no
149
+ def_delegator SiteMaps::Builder::Normalizer, :yes_or_no_with_default
150
+ def_delegator SiteMaps::Builder::Normalizer, :w3c_date
151
+ end
152
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class URLSet
5
+ SCHEMAS = {
6
+ "image" => "http://www.google.com/schemas/sitemap-image/1.1",
7
+ "mobile" => "http://www.google.com/schemas/sitemap-mobile/1.0",
8
+ "news" => "http://www.google.com/schemas/sitemap-news/0.9",
9
+ "pagemap" => "http://www.google.com/schemas/sitemap-pagemap/1.0",
10
+ "video" => "http://www.google.com/schemas/sitemap-video/1.1"
11
+ }.freeze
12
+
13
+ HEADER = <<~HEADER
14
+ <?xml version="1.0" encoding="UTF-8"?>
15
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
16
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
17
+ xmlns:xhtml="http://www.w3.org/1999/xhtml"
18
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
19
+ #{SCHEMAS.map { |name, uri| " xmlns:#{name}=\"#{uri}\"" }.join("\n")}
20
+ >
21
+ HEADER
22
+ FOOTER = "</urlset>"
23
+ FOOTER_BYTESIZE = FOOTER.bytesize
24
+
25
+ attr_reader :content, :links_count, :news_count
26
+
27
+ def initialize
28
+ @content = StringIO.new
29
+ @content.puts(HEADER)
30
+ @links_count = 0
31
+ @news_count = 0
32
+ @last_modified = nil
33
+ end
34
+
35
+ def add(link, **options)
36
+ raise SiteMaps::FullSitemapError if finalized?
37
+
38
+ url = SiteMaps::Builder::URL.new(link, **options)
39
+ raise SiteMaps::FullSitemapError unless fit?(url)
40
+
41
+ content.puts(url.to_xml)
42
+ @links_count += 1
43
+ @news_count += 1 if url.news?
44
+ if (lastmod = url.last_modified)
45
+ @last_modified ||= lastmod
46
+ @last_modified = lastmod if lastmod > @last_modified
47
+ end
48
+ url
49
+ end
50
+
51
+ def finalize!
52
+ return if finalized?
53
+
54
+ content.puts(FOOTER)
55
+ @to_xml = content.string.freeze
56
+ content.close
57
+ @to_xml
58
+ end
59
+
60
+ def to_xml
61
+ return content.string + FOOTER unless finalized?
62
+
63
+ @to_xml
64
+ end
65
+
66
+ def finalized?
67
+ defined?(@to_xml)
68
+ end
69
+
70
+ def empty?
71
+ links_count.zero?
72
+ end
73
+
74
+ def last_modified
75
+ @last_modified || Time.now
76
+ end
77
+
78
+ private
79
+
80
+ def bytesize
81
+ content.string.bytesize
82
+ end
83
+
84
+ # @param url [Builder::URL]
85
+ def fit?(url)
86
+ return false if links_count >= SiteMaps::MAX_LENGTH[:links]
87
+ return false if url.news? && news_count >= SiteMaps::MAX_LENGTH[:news]
88
+
89
+ (bytesize + url.bytesize + FOOTER_BYTESIZE) <= SiteMaps::MAX_FILESIZE
90
+ end
91
+ end
92
+ end