site_maps 0.0.1.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/main.yml +45 -0
  3. data/.gitignore +16 -0
  4. data/.rspec +1 -0
  5. data/.rubocop.yml +36 -0
  6. data/.tool-versions +1 -0
  7. data/CHANGELOG.md +8 -0
  8. data/Gemfile +18 -0
  9. data/Gemfile.lock +134 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +186 -0
  12. data/Rakefile +4 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +7 -0
  15. data/exec/site_maps +9 -0
  16. data/lib/site-maps.rb +3 -0
  17. data/lib/site_maps/adapters/adapter.rb +80 -0
  18. data/lib/site_maps/adapters/aws_sdk/config.rb +51 -0
  19. data/lib/site_maps/adapters/aws_sdk/location.rb +9 -0
  20. data/lib/site_maps/adapters/aws_sdk/storage.rb +52 -0
  21. data/lib/site_maps/adapters/aws_sdk.rb +31 -0
  22. data/lib/site_maps/adapters/file_system/config.rb +5 -0
  23. data/lib/site_maps/adapters/file_system/location.rb +35 -0
  24. data/lib/site_maps/adapters/file_system/storage.rb +61 -0
  25. data/lib/site_maps/adapters/file_system.rb +26 -0
  26. data/lib/site_maps/adapters/noop.rb +18 -0
  27. data/lib/site_maps/atomic_repository.rb +24 -0
  28. data/lib/site_maps/builder/link.rb +27 -0
  29. data/lib/site_maps/builder/normalizer.rb +48 -0
  30. data/lib/site_maps/builder/sitemap_index/item.rb +35 -0
  31. data/lib/site_maps/builder/sitemap_index.rb +40 -0
  32. data/lib/site_maps/builder/url.rb +152 -0
  33. data/lib/site_maps/builder/url_set.rb +92 -0
  34. data/lib/site_maps/cli.rb +68 -0
  35. data/lib/site_maps/configuration.rb +119 -0
  36. data/lib/site_maps/incremental_location.rb +62 -0
  37. data/lib/site_maps/notification/bus.rb +90 -0
  38. data/lib/site_maps/notification/event.rb +50 -0
  39. data/lib/site_maps/notification/publisher.rb +78 -0
  40. data/lib/site_maps/notification.rb +36 -0
  41. data/lib/site_maps/primitives/array.rb +15 -0
  42. data/lib/site_maps/primitives/output.rb +66 -0
  43. data/lib/site_maps/primitives/string.rb +43 -0
  44. data/lib/site_maps/process.rb +29 -0
  45. data/lib/site_maps/railtie.rb +18 -0
  46. data/lib/site_maps/runner/event_listener.rb +78 -0
  47. data/lib/site_maps/runner.rb +136 -0
  48. data/lib/site_maps/sitemap_builder.rb +75 -0
  49. data/lib/site_maps/sitemap_reader.rb +56 -0
  50. data/lib/site_maps/version.rb +5 -0
  51. data/lib/site_maps.rb +112 -0
  52. data/site_maps.gemspec +44 -0
  53. metadata +172 -0
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Adapters::AwsSdk::Storage
4
+ attr_reader :config
5
+
6
+ def initialize(config)
7
+ @config = config
8
+ end
9
+
10
+ def upload(location, **options)
11
+ options[:acl] ||= config.acl if config.acl
12
+ options[:cache_control] ||= config.cache_control if config.cache_control
13
+ options[:content_type] ||= location.gzip? ? "application/gzip" : "application/xml"
14
+ lastmod = options.delete(:last_modified) || Time.now
15
+ options[:metadata] ||= {}
16
+ options[:metadata]["given-last-modified"] = lastmod.utc.strftime("%Y-%m-%dT%H:%M:%S%:z")
17
+ obj = object(location.remote_path)
18
+ obj.upload_file(location.path, **options)
19
+ end
20
+
21
+ def read(location)
22
+ obj = object(location.remote_path).get
23
+ metadata = {
24
+ content_type: obj.content_type
25
+ }
26
+ if (raw = obj.metadata["given-last-modified"]) &&
27
+ (time = Time.parse(raw))
28
+ metadata[:last_modified] = time
29
+ end
30
+ [obj.body.read, metadata]
31
+ rescue Aws::S3::Errors::NoSuchKey
32
+ raise SiteMaps::FileNotFoundError, "File not found: #{location.remote_path}"
33
+ end
34
+
35
+ def delete(location)
36
+ object(location.remote_path).delete
37
+ rescue Aws::S3::Errors::NoSuchKey
38
+ raise SiteMaps::FileNotFoundError, "File not found: #{location.remote_path}"
39
+ end
40
+
41
+ private
42
+
43
+ def list_objects(prefix:)
44
+ config.s3_bucket.objects(
45
+ prefix: prefix
46
+ )
47
+ end
48
+
49
+ def object(remote_path)
50
+ config.s3_bucket.object(remote_path)
51
+ end
52
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Adapters
4
+ class AwsSdk < Adapter
5
+ def write(url, raw_data, **options)
6
+ location = Location.new(config.directory, url)
7
+ local_storage.write(location, raw_data)
8
+ s3_storage.upload(location, **options)
9
+ end
10
+
11
+ def read(url)
12
+ location = Location.new(config.directory, url)
13
+ s3_storage.read(location)
14
+ end
15
+
16
+ def delete(url)
17
+ location = Location.new(config.directory, url)
18
+ s3_storage.delete(location)
19
+ end
20
+
21
+ private
22
+
23
+ def local_storage
24
+ @local_storage ||= SiteMaps::Adapters::FileSystem::Storage.new
25
+ end
26
+
27
+ def s3_storage
28
+ @s3_storage ||= SiteMaps::Adapters::AwsSdk::Storage.new(config)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Adapters::FileSystem::Config < SiteMaps::Configuration
4
+ attribute :directory, default: "public/sitemaps"
5
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Adapters::FileSystem::Location < Struct.new(:root, :url)
4
+ ROOT_RE = %r{^/}
5
+ GZIP_RE = %r{\.gz$}
6
+
7
+ def path
8
+ File.join(
9
+ root,
10
+ make_relative(uri.path)
11
+ )
12
+ end
13
+
14
+ def directory
15
+ Pathname.new(root).join(remote_relative_dir).to_s
16
+ end
17
+
18
+ def gzip?
19
+ GZIP_RE.match?(uri.path)
20
+ end
21
+
22
+ private
23
+
24
+ def uri
25
+ @uri ||= URI.parse(url)
26
+ end
27
+
28
+ def remote_relative_dir
29
+ make_relative(File.dirname(uri.path))
30
+ end
31
+
32
+ def make_relative(path)
33
+ path.sub(ROOT_RE, "")
34
+ end
35
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Adapters::FileSystem::Storage
4
+ # @param [SiteMaps::Adapters::FileSystem::Location] location
5
+ # @param [String] raw_data
6
+ # @return [void]
7
+ # @raise [SiteMaps::Error] if the path is not a directory
8
+ def write(location, raw_data, **)
9
+ dir = location.directory
10
+
11
+ if !File.exist?(dir)
12
+ FileUtils.mkdir_p(dir)
13
+ elsif !File.directory?(dir)
14
+ raise SiteMaps::Error.new("The path #{dir} is not a directory")
15
+ end
16
+
17
+ stream = File.open(location.path, "wb")
18
+ if location.gzip?
19
+ gzip(stream, raw_data)
20
+ else
21
+ plain(stream, raw_data)
22
+ end
23
+ end
24
+
25
+ # @param [SiteMaps::Adapters::FileSystem::Location] location
26
+ # @return [Array<String, Hash>] The raw data and metadata
27
+ # @raise [SiteMaps::FileNotFoundError] if the file does not exist
28
+ def read(location)
29
+ if location.gzip?
30
+ [Zlib::GzipReader.open(location.path).read, {content_type: "application/gzip"}]
31
+ else
32
+ [File.read(location.path), {content_type: "application/xml"}]
33
+ end
34
+ rescue Zlib::GzipFile::Error
35
+ raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
36
+ rescue Errno::ENOENT
37
+ raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
38
+ end
39
+
40
+ # @param [SiteMaps::Adapters::FileSystem::Location] location
41
+ # @return [void]
42
+ # @raise [SiteMaps::FileNotFoundError] if the file does not exist
43
+ def delete(location)
44
+ File.delete(location.path)
45
+ rescue Errno::ENOENT
46
+ raise SiteMaps::FileNotFoundError.new("File not found: #{location.path}")
47
+ end
48
+
49
+ protected
50
+
51
+ def gzip(stream, data)
52
+ gz = Zlib::GzipWriter.new(stream)
53
+ gz.write data
54
+ gz.close
55
+ end
56
+
57
+ def plain(stream, data)
58
+ stream.write data
59
+ stream.close
60
+ end
61
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Adapters
4
+ class FileSystem < Adapter
5
+ def write(url, raw_data, **)
6
+ location = Location.new(config.directory, url)
7
+ storage.write(location, raw_data)
8
+ end
9
+
10
+ def read(url)
11
+ location = Location.new(config.directory, url)
12
+ storage.read(location)
13
+ end
14
+
15
+ def delete(url)
16
+ location = Location.new(config.directory, url)
17
+ storage.delete(location)
18
+ end
19
+
20
+ private
21
+
22
+ def storage
23
+ @storage ||= self.class::Storage.new
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Adapters
4
+ class Noop < Adapter
5
+ def write(_url, _raw_data, **_kwargs)
6
+ end
7
+
8
+ def read(_url)
9
+ end
10
+
11
+ def delete(_url)
12
+ end
13
+
14
+ def fetch_sitemap_index_links
15
+ []
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ class AtomicRepository
5
+ attr_reader :main_url, :preloaded_index_links
6
+
7
+ def initialize(main_url)
8
+ @main_url = main_url
9
+ @preloaded_index_links = Concurrent::Array.new
10
+ @generated_urls = Concurrent::Hash.new
11
+ end
12
+
13
+ def generate_url(raw_location)
14
+ location = IncrementalLocation.new(main_url, raw_location)
15
+ (@generated_urls[location.relative_directory] ||= location).next.url
16
+ end
17
+
18
+ def remaining_index_links
19
+ preloaded_index_links.reject do |link|
20
+ @generated_urls.key?(link.relative_directory)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class Link
5
+ attr_reader :uri
6
+
7
+ def initialize(base_url, path, params = {})
8
+ @uri = base_url.is_a?(::URI) ? base_url.dup : ::URI.parse(base_url)
9
+ @uri.user, @uri.query = nil
10
+ @uri.path = path
11
+ @uri.query = Rack::Utils.unescape(Rack::Utils.build_nested_query(params)) if params.is_a?(Hash) && params.any?
12
+ end
13
+
14
+ def to_s
15
+ @uri.to_s
16
+ end
17
+
18
+ def eql?(other)
19
+ to_s == other.to_s
20
+ end
21
+ alias_method :==, :eql?
22
+
23
+ def hash
24
+ to_s.hash
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ module Normalizer
5
+ extend self
6
+
7
+ def format_float(value)
8
+ value.is_a?(String) ? value : ("%0.1f" % value)
9
+ end
10
+
11
+ def yes_or_no(value)
12
+ if value.is_a?(String) && value.match?(/\A(yes|no)\z/i)
13
+ value.downcase
14
+ else
15
+ value ? "yes" : "no"
16
+ end
17
+ end
18
+
19
+ def yes_or_no_with_default(value, default)
20
+ value.nil? ? yes_or_no(default) : yes_or_no(value)
21
+ end
22
+
23
+ def w3c_date(date)
24
+ if date.is_a?(String)
25
+ date
26
+ elsif date.respond_to?(:iso8601)
27
+ date.iso8601.sub(/Z$/i, "+00:00")
28
+ elsif date.is_a?(Date) && defined?(DateTime) && !date.is_a?(DateTime)
29
+ date.strftime("%Y-%m-%d")
30
+ else
31
+ zulutime = if defined?(DateTime) && date.is_a?(DateTime)
32
+ date.new_offset(0)
33
+ elsif date.respond_to?(:utc)
34
+ date.utc
35
+ elsif date.is_a?(Integer)
36
+ Time.at(date).utc
37
+ end
38
+
39
+ if zulutime
40
+ zulutime.strftime("%Y-%m-%dT%H:%M:%S+00:00")
41
+ else
42
+ zone = date.strftime("%z").insert(-3, ":")
43
+ date.strftime("%Y-%m-%dT%H:%M:%S") + zone
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteMaps::Builder::SitemapIndex::Item < Struct.new(:loc, :lastmod)
4
+ extend Forwardable
5
+
6
+ def to_xml
7
+ builder = ::Builder::XmlMarkup.new
8
+ builder.sitemap do
9
+ builder.loc(loc)
10
+ builder.lastmod w3c_date(lastmod) if lastmod
11
+ end
12
+ builder << "\n"
13
+ end
14
+
15
+ def eql?(other)
16
+ loc == other.loc
17
+ end
18
+ alias_method :==, :eql?
19
+
20
+ def hash
21
+ loc.hash
22
+ end
23
+
24
+ def relative_directory
25
+ return unless loc =~ %r{^https?://[^/]+(/.*)$}
26
+
27
+ val = File.dirname(Regexp.last_match(1))
28
+ val = val[1..-1] if val.start_with?("/")
29
+ val
30
+ end
31
+
32
+ protected
33
+
34
+ def_delegator SiteMaps::Builder::Normalizer, :w3c_date
35
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class SitemapIndex
5
+ HEADER = <<~HEADER
6
+ <?xml version="1.0" encoding="UTF-8"?>
7
+ <sitemapindex
8
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
9
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
10
+ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
11
+ >
12
+ HEADER
13
+ FOOTER = "</sitemapindex>"
14
+
15
+ attr_reader :sitemaps
16
+
17
+ def initialize
18
+ @sitemaps = Concurrent::Set.new
19
+ end
20
+
21
+ def add(loc, lastmod: nil)
22
+ sitemap = loc.is_a?(Item) ? loc : Item.new(loc, lastmod)
23
+ @sitemaps.add(sitemap)
24
+ end
25
+
26
+ def to_xml
27
+ io = StringIO.new
28
+ io.puts(HEADER)
29
+ @sitemaps.each do |sitemap|
30
+ io.puts(sitemap.to_xml)
31
+ end
32
+ io.puts(FOOTER)
33
+ io.string
34
+ end
35
+
36
+ def empty?
37
+ @sitemaps.empty?
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class URL
5
+ extend Forwardable
6
+
7
+ DEFAULTS = {
8
+ changefreq: "weekly",
9
+ priority: 0.5
10
+ }.freeze
11
+
12
+ attr_reader :attributes
13
+
14
+ def initialize(link, **attributes)
15
+ @attributes = DEFAULTS.merge(attributes)
16
+ @attributes[:loc] = link
17
+ @attributes[:alternates] = SiteMaps::Primitives::Array.wrap(@attributes[:alternates])
18
+ @attributes[:videos] = SiteMaps::Primitives::Array.wrap(@attributes[:videos])
19
+ @attributes[:images] = SiteMaps::Primitives::Array.wrap(@attributes[:images])
20
+ if (video = @attributes.delete(:video))
21
+ @attributes[:videos].concat(SiteMaps::Primitives::Array.wrap(video))
22
+ end
23
+ if (alternate = @attributes.delete(:alternate))
24
+ @attributes[:alternates].concat(SiteMaps::Primitives::Array.wrap(alternate))
25
+ end
26
+ if (image = @attributes.delete(:image))
27
+ @attributes[:images].concat(SiteMaps::Primitives::Array.wrap(image))
28
+ end
29
+ @attributes[:images] = @attributes[:images][0...SiteMaps::MAX_LENGTH[:images]]
30
+ end
31
+
32
+ def [](key)
33
+ attributes[key]
34
+ end
35
+
36
+ def last_modified
37
+ return unless self[:lastmod].respond_to?(:strftime)
38
+
39
+ self[:lastmod]
40
+ end
41
+
42
+ def to_xml
43
+ return @to_xml if defined?(@to_xml)
44
+
45
+ builder = ::Builder::XmlMarkup.new
46
+ builder.url do
47
+ builder.loc self[:loc]
48
+ builder.lastmod w3c_date(self[:lastmod]) if self[:lastmod]
49
+ builder.expires w3c_date(self[:expires]) if self[:expires]
50
+ builder.changefreq self[:changefreq].to_s if self[:changefreq]
51
+ builder.priority format_float(self[:priority]) if self[:priority]
52
+
53
+ if news?
54
+ news_data = self[:news]
55
+ builder.news :news do
56
+ builder.news :publication do
57
+ builder.news :name, news_data[:publication_name].to_s if news_data[:publication_name]
58
+ builder.news :language, news_data[:publication_language].to_s if news_data[:publication_language]
59
+ end
60
+
61
+ builder.news :access, news_data[:access].to_s if news_data[:access]
62
+ builder.news :genres, news_data[:genres].to_s if news_data[:genres]
63
+ builder.news :publication_date, w3c_date(news_data[:publication_date]) if news_data[:publication_date]
64
+ builder.news :title, news_data[:title].to_s if news_data[:title]
65
+ builder.news :keywords, news_data[:keywords].to_s if news_data[:keywords]
66
+ builder.news :stock_tickers, news_data[:stock_tickers].to_s if news_data[:stock_tickers]
67
+ end
68
+ end
69
+
70
+ self[:images].each do |image|
71
+ builder.image :image do
72
+ builder.image :loc, image[:loc]
73
+ builder.image :caption, image[:caption].to_s if image[:caption]
74
+ builder.image :geo_location, image[:geo_location].to_s if image[:geo_location]
75
+ builder.image :title, image[:title].to_s if image[:title]
76
+ builder.image :license, image[:license].to_s if image[:license]
77
+ end
78
+ end
79
+
80
+ self[:videos].each do |video|
81
+ builder.video :video do
82
+ builder.video :thumbnail_loc, video[:thumbnail_loc].to_s
83
+ builder.video :title, video[:title].to_s
84
+ builder.video :description, video[:description].to_s
85
+ builder.video :content_loc, video[:content_loc].to_s if video[:content_loc]
86
+ if video[:player_loc]
87
+ loc_attributes = {allow_embed: yes_or_no_with_default(video[:allow_embed], true)}
88
+ loc_attributes[:autoplay] = video[:autoplay].to_s if video[:autoplay]
89
+ builder.video :player_loc, video[:player_loc].to_s, loc_attributes
90
+ end
91
+ builder.video :duration, video[:duration].to_s if video[:duration]
92
+ builder.video :expiration_date, w3c_date(video[:expiration_date]) if video[:expiration_date]
93
+ builder.video :rating, format_float(video[:rating]) if video[:rating]
94
+ builder.video :view_count, video[:view_count].to_s if video[:view_count]
95
+ builder.video :publication_date, w3c_date(video[:publication_date]) if video[:publication_date]
96
+ video[:tags]&.each { |tag| builder.video :tag, tag.to_s }
97
+ builder.video :tag, video[:tag].to_s if video[:tag]
98
+ builder.video :category, video[:category].to_s if video[:category]
99
+ builder.video :family_friendly, yes_or_no_with_default(video[:family_friendly], true) if video.has_key?(:family_friendly)
100
+ builder.video :gallery_loc, video[:gallery_loc].to_s, title: video[:gallery_title].to_s if video[:gallery_loc]
101
+ builder.video :price, video[:price].to_s, prepare_video_price_attribs(video) if video[:price]
102
+ if video[:uploader]
103
+ builder.video :uploader, video[:uploader].to_s, video[:uploader_info] ? {info: video[:uploader_info].to_s} : {}
104
+ end
105
+ builder.video :live, yes_or_no_with_default(video[:live], true) if video.has_key?(:live)
106
+ builder.video :requires_subscription, yes_or_no_with_default(video[:requires_subscription], true) if video.has_key?(:requires_subscription)
107
+ end
108
+ end
109
+
110
+ self[:alternates].each do |alternate|
111
+ rel = alternate[:nofollow] ? "alternate nofollow" : "alternate"
112
+ attributes = {rel: rel, href: alternate[:href].to_s}
113
+ attributes[:hreflang] = alternate[:lang].to_s if alternate[:lang]
114
+ attributes[:media] = alternate[:media].to_s if alternate[:media]
115
+ builder.xhtml :link, attributes
116
+ end
117
+
118
+ unless self[:mobile].nil?
119
+ builder.mobile :mobile
120
+ end
121
+
122
+ if self[:pagemap].is_a?(Hash) && (pagemap = self[:pagemap]).any?
123
+ builder.pagemap :PageMap do
124
+ SiteMaps::Primitives::Array.wrap(pagemap[:dataobjects]).each do |dataobject|
125
+ builder.pagemap :DataObject, type: dataobject[:type].to_s, id: dataobject[:id].to_s do
126
+ SiteMaps::Primitives::Array.wrap(dataobject[:attributes]).each do |attribute|
127
+ builder.pagemap :Attribute, attribute[:value].to_s, name: attribute[:name].to_s
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+ @to_xml = builder << "\n"
135
+ end
136
+
137
+ def news?
138
+ self[:news].is_a?(Hash) && self[:news].any?
139
+ end
140
+
141
+ def bytesize
142
+ to_xml.bytesize
143
+ end
144
+
145
+ private
146
+
147
+ def_delegator SiteMaps::Builder::Normalizer, :format_float
148
+ def_delegator SiteMaps::Builder::Normalizer, :yes_or_no
149
+ def_delegator SiteMaps::Builder::Normalizer, :yes_or_no_with_default
150
+ def_delegator SiteMaps::Builder::Normalizer, :w3c_date
151
+ end
152
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Builder
4
+ class URLSet
5
+ SCHEMAS = {
6
+ "image" => "http://www.google.com/schemas/sitemap-image/1.1",
7
+ "mobile" => "http://www.google.com/schemas/sitemap-mobile/1.0",
8
+ "news" => "http://www.google.com/schemas/sitemap-news/0.9",
9
+ "pagemap" => "http://www.google.com/schemas/sitemap-pagemap/1.0",
10
+ "video" => "http://www.google.com/schemas/sitemap-video/1.1"
11
+ }.freeze
12
+
13
+ HEADER = <<~HEADER
14
+ <?xml version="1.0" encoding="UTF-8"?>
15
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
16
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
17
+ xmlns:xhtml="http://www.w3.org/1999/xhtml"
18
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
19
+ #{SCHEMAS.map { |name, uri| " xmlns:#{name}=\"#{uri}\"" }.join("\n")}
20
+ >
21
+ HEADER
22
+ FOOTER = "</urlset>"
23
+ FOOTER_BYTESIZE = FOOTER.bytesize
24
+
25
+ attr_reader :content, :links_count, :news_count
26
+
27
+ def initialize
28
+ @content = StringIO.new
29
+ @content.puts(HEADER)
30
+ @links_count = 0
31
+ @news_count = 0
32
+ @last_modified = nil
33
+ end
34
+
35
+ def add(link, **options)
36
+ raise SiteMaps::FullSitemapError if finalized?
37
+
38
+ url = SiteMaps::Builder::URL.new(link, **options)
39
+ raise SiteMaps::FullSitemapError unless fit?(url)
40
+
41
+ content.puts(url.to_xml)
42
+ @links_count += 1
43
+ @news_count += 1 if url.news?
44
+ if (lastmod = url.last_modified)
45
+ @last_modified ||= lastmod
46
+ @last_modified = lastmod if lastmod > @last_modified
47
+ end
48
+ url
49
+ end
50
+
51
+ def finalize!
52
+ return if finalized?
53
+
54
+ content.puts(FOOTER)
55
+ @to_xml = content.string.freeze
56
+ content.close
57
+ @to_xml
58
+ end
59
+
60
+ def to_xml
61
+ return content.string + FOOTER unless finalized?
62
+
63
+ @to_xml
64
+ end
65
+
66
+ def finalized?
67
+ defined?(@to_xml)
68
+ end
69
+
70
+ def empty?
71
+ links_count.zero?
72
+ end
73
+
74
+ def last_modified
75
+ @last_modified || Time.now
76
+ end
77
+
78
+ private
79
+
80
+ def bytesize
81
+ content.string.bytesize
82
+ end
83
+
84
+ # @param url [Builder::URL]
85
+ def fit?(url)
86
+ return false if links_count >= SiteMaps::MAX_LENGTH[:links]
87
+ return false if url.news? && news_count >= SiteMaps::MAX_LENGTH[:news]
88
+
89
+ (bytesize + url.bytesize + FOOTER_BYTESIZE) <= SiteMaps::MAX_FILESIZE
90
+ end
91
+ end
92
+ end