site_maps 0.0.1.beta3 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +2 -4
  3. data/.rubocop.yml +4 -2
  4. data/.tool-versions +1 -1
  5. data/AGENTS.md +73 -0
  6. data/CHANGELOG.md +5 -0
  7. data/CLAUDE.md +77 -0
  8. data/Gemfile +1 -0
  9. data/Gemfile.lock +72 -56
  10. data/README.md +531 -393
  11. data/docs/README.md +67 -0
  12. data/docs/adapters.md +143 -0
  13. data/docs/api.md +154 -0
  14. data/docs/cli.md +93 -0
  15. data/docs/events.md +79 -0
  16. data/docs/extensions.md +141 -0
  17. data/docs/getting-started.md +138 -0
  18. data/docs/middleware.md +85 -0
  19. data/docs/processes.md +156 -0
  20. data/docs/rails.md +128 -0
  21. data/lib/site_maps/adapters/adapter.rb +35 -5
  22. data/lib/site_maps/adapters/aws_sdk/storage.rb +5 -2
  23. data/lib/site_maps/builder/sitemap_index/item.rb +1 -1
  24. data/lib/site_maps/builder/sitemap_index.rb +29 -5
  25. data/lib/site_maps/builder/url.rb +13 -10
  26. data/lib/site_maps/builder/url_set.rb +17 -7
  27. data/lib/site_maps/builder/xsl_stylesheet.rb +192 -0
  28. data/lib/site_maps/cli.rb +6 -2
  29. data/lib/site_maps/configuration.rb +8 -1
  30. data/lib/site_maps/incremental_location.rb +1 -1
  31. data/lib/site_maps/middleware.rb +197 -0
  32. data/lib/site_maps/notification/event.rb +1 -1
  33. data/lib/site_maps/notification/publisher.rb +1 -0
  34. data/lib/site_maps/notification.rb +1 -0
  35. data/lib/site_maps/ping.rb +35 -0
  36. data/lib/site_maps/{primitives → primitive}/array.rb +1 -1
  37. data/lib/site_maps/{primitives → primitive}/output.rb +1 -1
  38. data/lib/site_maps/primitive/string.rb +106 -0
  39. data/lib/site_maps/robots_txt.rb +21 -0
  40. data/lib/site_maps/runner/event_listener.rb +2 -2
  41. data/lib/site_maps/runner.rb +17 -3
  42. data/lib/site_maps/sitemap_builder.rb +16 -4
  43. data/lib/site_maps/sitemap_reader.rb +3 -0
  44. data/lib/site_maps/version.rb +1 -1
  45. data/lib/site_maps.rb +81 -10
  46. data/site_maps.gemspec +1 -1
  47. metadata +23 -10
  48. data/lib/site_maps/primitives/string.rb +0 -43
@@ -0,0 +1,138 @@
1
+ # Getting Started
2
+
3
+ ## Install
4
+
5
+ ```ruby
6
+ # Gemfile
7
+ gem 'site_maps'
8
+ ```
9
+
10
+ ```bash
11
+ bundle install
12
+ ```
13
+
14
+ ## Your first sitemap
15
+
16
+ Create `config/sitemap.rb`:
17
+
18
+ ```ruby
19
+ SiteMaps.use(:file_system) do
20
+ configure do |config|
21
+ config.url = 'https://example.com/sitemap.xml'
22
+ config.directory = File.expand_path('public', __dir__)
23
+ end
24
+
25
+ process do |s|
26
+ s.add('/', priority: 1.0, changefreq: 'daily')
27
+ s.add('/about', priority: 0.8, lastmod: Time.now)
28
+ s.add('/contact', priority: 0.5)
29
+ end
30
+ end
31
+ ```
32
+
33
+ Generate:
34
+
35
+ ```bash
36
+ bundle exec site_maps generate --config-file config/sitemap.rb
37
+ ```
38
+
39
+ Output: `public/sitemap.xml`.
40
+
41
+ ## Dynamic URLs
42
+
43
+ Yield `s.add` for every URL you want indexed. Database records work naturally:
44
+
45
+ ```ruby
46
+ process :posts do |s|
47
+ Post.published.find_each do |post|
48
+ s.add("/posts/#{post.slug}", lastmod: post.updated_at, priority: 0.7)
49
+ end
50
+ end
51
+ ```
52
+
53
+ When the URL count of a single process exceeds `max_links` (default 50,000), the file is split into `sitemap1.xml`, `sitemap2.xml`, … and a sitemap index is written at `config.url`.
54
+
55
+ ## Named processes
56
+
57
+ Named processes get their own file and run in parallel:
58
+
59
+ ```ruby
60
+ SiteMaps.use(:file_system) do
61
+ configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
62
+
63
+ process :static do |s|
64
+ s.add('/')
65
+ s.add('/about')
66
+ end
67
+
68
+ process :posts, 'posts/sitemap.xml' do |s|
69
+ Post.find_each { |p| s.add("/posts/#{p.slug}") }
70
+ end
71
+
72
+ process :products, 'products/sitemap.xml' do |s|
73
+ Product.find_each { |p| s.add("/products/#{p.id}") }
74
+ end
75
+ end
76
+ ```
77
+
78
+ Run all:
79
+
80
+ ```bash
81
+ bundle exec site_maps generate --config-file config/sitemap.rb --max-threads 4
82
+ ```
83
+
84
+ Run one:
85
+
86
+ ```bash
87
+ bundle exec site_maps generate posts --config-file config/sitemap.rb
88
+ ```
89
+
90
+ See [processes.md](processes.md) for the full process DSL including parameterized templates.
91
+
92
+ ## Using it in Rails
93
+
94
+ Add `site_maps` to your Gemfile and generate from a Rake task, a scheduled job, or your deploy pipeline. The Railtie injects URL helpers:
95
+
96
+ ```ruby
97
+ # config/sitemap.rb
98
+ SiteMaps.use(:file_system) do
99
+ configure do |config|
100
+ config.url = 'https://example.com/sitemap.xml'
101
+ config.directory = Rails.public_path.to_s
102
+ end
103
+
104
+ process do |s|
105
+ s.add(s.route.root_path, priority: 1.0)
106
+ s.add(s.route.about_path)
107
+ Post.find_each { |post| s.add(s.route.post_path(post), lastmod: post.updated_at) }
108
+ end
109
+ end
110
+ ```
111
+
112
+ See [rails.md](rails.md) for the full Rails integration, including asset precompile hooks and the Rack middleware for serving generated sitemaps.
113
+
114
+ ## Uploading to S3
115
+
116
+ Swap the adapter line:
117
+
118
+ ```ruby
119
+ SiteMaps.use(:aws_sdk) do
120
+ configure do |config|
121
+ config.url = 'https://my-bucket.s3.amazonaws.com/sitemap.xml'
122
+ config.bucket = 'my-bucket'
123
+ config.region = ENV['AWS_REGION']
124
+ # access_key_id / secret_access_key default to ENV vars
125
+ end
126
+
127
+ process { |s| ... }
128
+ end
129
+ ```
130
+
131
+ See [adapters.md](adapters.md) for adapter specifics and how to build your own.
132
+
133
+ ## Next steps
134
+
135
+ - [Processes](processes.md) — split your sitemap into static and dynamic shards
136
+ - [SEO extensions](extensions.md) — image, video, news, hreflang
137
+ - [CLI](cli.md) — automation-friendly generate command
138
+ - [Rack middleware](middleware.md) — serve the generated files with correct headers
@@ -0,0 +1,85 @@
1
+ # Rack Middleware
2
+
3
+ `SiteMaps::Middleware` serves generated sitemap files directly from the app. Useful when you've generated to `public/sitemaps/` (filesystem adapter) and want proper `Content-Type`, gzip handling, and XSL stylesheet routing without editing your web-server config.
4
+
5
+ ## Basic usage
6
+
7
+ ```ruby
8
+ # config/application.rb (Rails)
9
+ config.middleware.use SiteMaps::Middleware, adapter: -> { SiteMaps.current_adapter }
10
+ ```
11
+
12
+ Or inline in `config.ru`:
13
+
14
+ ```ruby
15
+ require 'site_maps'
16
+
17
+ use SiteMaps::Middleware, adapter: SiteMaps.current_adapter
18
+ run MyApp
19
+ ```
20
+
21
+ ## Options
22
+
23
+ ```ruby
24
+ use SiteMaps::Middleware,
25
+ adapter: SiteMaps.current_adapter,
26
+ public_prefix: nil,
27
+ storage_prefix: nil,
28
+ x_robots_tag: 'noindex, follow',
29
+ cache_control: 'public, max-age=3600'
30
+ ```
31
+
32
+ | Option | Purpose |
33
+ |--------|---------|
34
+ | `adapter` | Adapter instance (or a callable returning one — useful if the adapter is reconfigured at boot). |
35
+ | `public_prefix` | Strip from request path before lookup — e.g. `/sitemap` if your app mounts them under a sub-path. |
36
+ | `storage_prefix` | Prepend to the lookup key — e.g. `tenants/acme` for multi-tenant layouts. |
37
+ | `x_robots_tag` | `X-Robots-Tag` header added to served files. |
38
+ | `cache_control` | `Cache-Control` header. |
39
+
40
+ ## Behavior
41
+
42
+ The middleware intercepts requests for `*.xml` and `*.xml.gz` files:
43
+
44
+ - Matches → serve from the adapter with `Content-Type: application/xml`, plus `X-Robots-Tag` and `Cache-Control`.
45
+ - Gzipped sources → auto-decompress on serve so XSL stylesheets render in the browser. Clients asking for `.xml.gz` still get the compressed bytes.
46
+ - Doesn't match → `env` passes through to `@app.call`.
47
+
48
+ ## XSL stylesheets
49
+
50
+ The middleware also serves the built-in XSL stylesheets — pretty sitemap rendering for human visitors — at their referenced paths. Configure their URLs via:
51
+
52
+ ```ruby
53
+ configure do |config|
54
+ config.xsl_stylesheet_url = '/_sitemap-stylesheet.xsl'
55
+ config.xsl_index_stylesheet_url = '/_sitemap-index-stylesheet.xsl'
56
+ end
57
+ ```
58
+
59
+ ## Multi-tenant routing
60
+
61
+ For per-tenant sitemaps stored under subpaths:
62
+
63
+ ```ruby
64
+ use SiteMaps::Middleware,
65
+ adapter: per_request_adapter,
66
+ storage_prefix: ->(request) { "tenants/#{request.host.split('.').first}" }
67
+ ```
68
+
69
+ If the adapter itself already scopes paths by tenant, no prefix is needed — just point it at the right one for each request.
70
+
71
+ ## robots.txt integration
72
+
73
+ Emit a `Sitemap:` directive for the generated file:
74
+
75
+ ```ruby
76
+ # config.ru or a controller
77
+ SiteMaps::RobotsTxt.sitemap_directive('https://example.com/sitemap.xml')
78
+ # => "Sitemap: https://example.com/sitemap.xml"
79
+
80
+ SiteMaps::RobotsTxt.render(
81
+ sitemap_url: 'https://example.com/sitemap.xml',
82
+ extra_directives: ['Disallow: /admin']
83
+ )
84
+ # => "Sitemap: https://example.com/sitemap.xml\nDisallow: /admin"
85
+ ```
data/docs/processes.md ADDED
@@ -0,0 +1,156 @@
1
+ # Processes
2
+
3
+ A **process** is a unit of work that produces part of a sitemap. Each process runs on its own thread, writes its own URL set, and becomes an entry in the sitemap index.
4
+
5
+ ## Static processes
6
+
7
+ A static process has no parameters. It runs once and writes one (possibly split) sitemap file.
8
+
9
+ ```ruby
10
+ SiteMaps.use(:file_system) do
11
+ configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
12
+
13
+ process do |s|
14
+ s.add('/', priority: 1.0)
15
+ s.add('/about')
16
+ end
17
+
18
+ process :posts, 'posts/sitemap.xml' do |s|
19
+ Post.find_each { |post| s.add("/posts/#{post.slug}", lastmod: post.updated_at) }
20
+ end
21
+ end
22
+ ```
23
+
24
+ - Without an explicit name, the process is named `:default`.
25
+ - Without an explicit location, a default filename is assigned.
26
+ - The block receives a `SitemapBuilder` (`s`), on which `add` is called per URL.
27
+
28
+ ## Dynamic processes
29
+
30
+ A dynamic process has placeholders in its location template and corresponding kwargs. Each unique combination of kwargs produces a separate sitemap file.
31
+
32
+ ```ruby
33
+ process :monthly_posts, 'posts/%{year}-%{month}/sitemap.xml', year: 2024, month: 1 do |s, year:, month:, **|
34
+ Post.where('extract(year from published_at) = ? AND extract(month from published_at) = ?', year, month)
35
+ .find_each { |p| s.add("/posts/#{p.slug}", lastmod: p.updated_at) }
36
+ end
37
+ ```
38
+
39
+ The kwargs passed to `process` are **defaults**; the real values come from `Runner#enqueue`:
40
+
41
+ ```ruby
42
+ runner = SiteMaps.generate(config_file: 'config/sitemap.rb')
43
+ runner.enqueue(:monthly_posts, year: 2024, month: 1)
44
+ runner.enqueue(:monthly_posts, year: 2024, month: 2)
45
+ runner.enqueue(:monthly_posts, year: 2024, month: 3)
46
+ runner.run
47
+ ```
48
+
49
+ Or from the CLI:
50
+
51
+ ```bash
52
+ bundle exec site_maps generate monthly_posts \
53
+ --config-file config/sitemap.rb \
54
+ --context=year:2024 month:1
55
+ ```
56
+
57
+ ## Execution model
58
+
59
+ When you call `runner.run`:
60
+
61
+ 1. Each enqueued process is wrapped in a `Concurrent::Future`.
62
+ 2. The pool (default 4 threads, configurable via `--max-threads`) runs them in parallel.
63
+ 3. Each process builds a `URLSet`. When the set fills up (50,000 links, 1,000 news items, or 50 MB uncompressed), it's finalized and written, and a new URLSet starts — automatically.
64
+ 4. After every process finishes, the sitemap index is aggregated and written to `config.url`.
65
+
66
+ ## Splitting rules
67
+
68
+ A URL set is finalized and rolled over when **any** of these apply:
69
+
70
+ - Links reach `config.max_links` (default 50,000 — the sitemap spec limit).
71
+ - News entries reach 1,000.
72
+ - Uncompressed XML reaches 50 MB.
73
+
74
+ Split files are named by `IncrementalLocation`: `posts/sitemap.xml` becomes `posts/sitemap1.xml`, `posts/sitemap2.xml`, etc.
75
+
76
+ ## Index generation
77
+
78
+ A sitemap index is produced when:
79
+
80
+ - More than one process exists,
81
+ - A single process was split across multiple files, or
82
+ - External sitemaps were added.
83
+
84
+ Otherwise a single `urlset` is written directly at `config.url` (the "inline" optimization).
85
+
86
+ ## Adding external sitemaps
87
+
88
+ Reference third-party or pre-existing sitemaps in the index:
89
+
90
+ ```ruby
91
+ SiteMaps.use(:file_system) do
92
+ configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
93
+
94
+ external_sitemap('https://cdn.example.com/legacy-sitemap.xml', lastmod: Time.parse('2024-01-15'))
95
+
96
+ process { |s| s.add('/') }
97
+ end
98
+ ```
99
+
100
+ ## Shared helpers across processes
101
+
102
+ Use `extend_processes_with` to add methods that every process block can call:
103
+
104
+ ```ruby
105
+ module Helpers
106
+ def post_path(post) = "/posts/#{post.slug}"
107
+ def published_posts = Post.where.not(published_at: nil)
108
+ end
109
+
110
+ SiteMaps.use(:file_system) do
111
+ configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
112
+ extend_processes_with(Helpers)
113
+
114
+ process :posts do |s|
115
+ published_posts.find_each { |p| s.add(post_path(p), lastmod: p.updated_at) }
116
+ end
117
+ end
118
+ ```
119
+
120
+ ## URL filters
121
+
122
+ Filters run per URL inside every process — use them for global exclusions or default attributes:
123
+
124
+ ```ruby
125
+ SiteMaps.use(:file_system) do
126
+ configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
127
+
128
+ # Exclude any /admin path
129
+ url_filter { |url, _options| false if url.include?('/admin') }
130
+
131
+ # Boost blog priority
132
+ url_filter do |url, options|
133
+ if url.include?('/blog/')
134
+ options.merge(priority: 0.9, changefreq: 'daily')
135
+ else
136
+ options
137
+ end
138
+ end
139
+
140
+ process { |s| ... }
141
+ end
142
+ ```
143
+
144
+ A filter returning `false` (or `nil`) excludes the URL entirely. Returning a hash replaces the options.
145
+
146
+ ## Re-running a single shard
147
+
148
+ Only regenerate what changed — the rest is preserved from the existing sitemap index:
149
+
150
+ ```ruby
151
+ runner = SiteMaps.generate(config_file: 'config/sitemap.rb')
152
+ runner.enqueue(:monthly_posts, year: 2024, month: 3) # only March
153
+ runner.run # Jan and Feb kept as-is
154
+ ```
155
+
156
+ This is the main advantage of parameterized dynamic processes: you can rebuild one month's shard on a cron and leave the rest untouched.
data/docs/rails.md ADDED
@@ -0,0 +1,128 @@
1
+ # Rails Integration
2
+
3
+ The Railtie loads automatically when Rails is present. It wires two things:
4
+
5
+ 1. **URL helpers** — `s.route.<helper>` inside process blocks.
6
+ 2. **No other magic** — no initializer, no autoloaded directories, no patched generators.
7
+
8
+ ## URL helpers in processes
9
+
10
+ ```ruby
11
+ # config/sitemap.rb
12
+ SiteMaps.use(:file_system) do
13
+ configure do |config|
14
+ config.url = 'https://example.com/sitemap.xml'
15
+ config.directory = Rails.public_path.to_s
16
+ end
17
+
18
+ process do |s|
19
+ s.add(s.route.root_path, priority: 1.0)
20
+ s.add(s.route.about_path)
21
+ Post.find_each { |p| s.add(s.route.post_path(p), lastmod: p.updated_at) }
22
+ end
23
+ end
24
+ ```
25
+
26
+ `s.route` is a singleton wrapping `Rails.application.routes.url_helpers`.
27
+
28
+ ## Generating from Rails
29
+
30
+ ### One-off
31
+
32
+ ```bash
33
+ bundle exec site_maps generate --config-file config/sitemap.rb
34
+ ```
35
+
36
+ The CLI auto-requires `config/environment.rb` if it finds a `config/application.rb`, so ActiveRecord, URL helpers, and everything else loads as normal.
37
+
38
+ ### From a Rake task
39
+
40
+ ```ruby
41
+ # lib/tasks/sitemap.rake
42
+ namespace :sitemap do
43
+ desc 'Generate sitemaps'
44
+ task generate: :environment do
45
+ runner = SiteMaps.generate(config_file: Rails.root.join('config/sitemap.rb').to_s)
46
+ runner.enqueue_all.run
47
+ end
48
+ end
49
+ ```
50
+
51
+ Run on deploy or via cron:
52
+
53
+ ```bash
54
+ bundle exec rake sitemap:generate
55
+ ```
56
+
57
+ ### From a scheduled job
58
+
59
+ ```ruby
60
+ class SitemapJob < ApplicationJob
61
+ def perform
62
+ runner = SiteMaps.generate(config_file: Rails.root.join('config/sitemap.rb').to_s)
63
+ runner.enqueue_all.run
64
+ end
65
+ end
66
+
67
+ SitemapJob.set(cron: '0 3 * * *').perform_later
68
+ ```
69
+
70
+ ## Serving generated sitemaps
71
+
72
+ Add the Rack middleware to serve files generated by the `:file_system` adapter:
73
+
74
+ ```ruby
75
+ # config/application.rb
76
+ config.middleware.use SiteMaps::Middleware, adapter: -> { SiteMaps.current_adapter }
77
+ ```
78
+
79
+ See [middleware.md](middleware.md) for options.
80
+
81
+ ## Asset precompile integration
82
+
83
+ If you want sitemaps regenerated on every deploy, hook into `assets:precompile`:
84
+
85
+ ```ruby
86
+ # lib/tasks/sitemap.rake
87
+ Rake::Task['assets:precompile'].enhance(['sitemap:generate'])
88
+ ```
89
+
90
+ ## robots.txt
91
+
92
+ ```erb
93
+ <%# public/robots.txt.erb or app/views/robots.text.erb %>
94
+ User-agent: *
95
+ Disallow: /admin
96
+
97
+ <%= SiteMaps::RobotsTxt.sitemap_directive('https://example.com/sitemap.xml') %>
98
+ ```
99
+
100
+ ## Multi-tenant
101
+
102
+ `SiteMaps.define` gives you a generation function parameterized by runtime context:
103
+
104
+ ```ruby
105
+ # config/sitemap.rb
106
+ SiteMaps.define do |tenant:|
107
+ use(:file_system) do
108
+ configure do |config|
109
+ config.url = "https://#{tenant.domain}/sitemap.xml"
110
+ config.directory = tenant.public_path
111
+ end
112
+
113
+ process { |s| tenant.pages.each { |page| s.add(page.path, lastmod: page.updated_at) } }
114
+ end
115
+ end
116
+ ```
117
+
118
+ ```ruby
119
+ Tenant.find_each do |tenant|
120
+ SiteMaps.generate(config_file: 'config/sitemap.rb', context: { tenant: tenant }).enqueue_all.run
121
+ end
122
+ ```
123
+
124
+ The context hash is splatted into the `define` block as keyword args.
125
+
126
+ ## Dependencies
127
+
128
+ - Rails is **not** listed in the gemspec. The Railtie is loaded only if Rails is already present. If you're using `site_maps` in a non-Rails Ruby project, the Rails-specific pieces are inert.
@@ -13,11 +13,12 @@ module SiteMaps::Adapters
13
13
  end
14
14
 
15
15
  def_delegators :config, :fetch_sitemap_index_links
16
- attr_reader :sitemap_index, :processes
16
+ attr_reader :sitemap_index, :processes, :process_mixins
17
17
 
18
18
  def initialize(**options, &block)
19
19
  @config = SiteMaps.config.becomes(self.class.config_class, **options)
20
20
  @processes = Concurrent::Hash.new
21
+ @process_mixins = Concurrent::Array.new
21
22
  reset!
22
23
  instance_exec(&block) if block
23
24
  end
@@ -60,20 +61,49 @@ module SiteMaps::Adapters
60
61
  @processes[name] = SiteMaps::Process.new(name, location, kwargs, block)
61
62
  end
62
63
 
64
+ def external_sitemap(url, lastmod: nil)
65
+ @external_sitemaps ||= Concurrent::Array.new
66
+ @external_sitemaps << SiteMaps::Builder::SitemapIndex::Item.new(url, lastmod)
67
+ end
68
+
69
+ def external_sitemaps
70
+ @external_sitemaps || []
71
+ end
72
+
63
73
  def maybe_inline_urlset?
64
- @processes.size == 1 && @processes.first.last.static?
74
+ @processes.size == 1 && @processes.first.last.static? && external_sitemaps.empty?
65
75
  end
66
76
 
67
77
  def repo
68
78
  @repo ||= SiteMaps::AtomicRepository.new(config.url)
69
79
  end
70
80
 
71
- def include_module(mod)
72
- extend(mod)
81
+ def extend_processes_with(mod)
82
+ @process_mixins << mod
83
+ end
84
+
85
+ def url_filter(&block)
86
+ if block
87
+ @url_filters ||= Concurrent::Array.new
88
+ @url_filters << block
89
+ end
90
+ @url_filters || []
91
+ end
92
+
93
+ def apply_url_filters(link, options)
94
+ url = link.respond_to?(:to_s) ? link.to_s : link
95
+ url_filter.each do |filter|
96
+ result = filter.call(url, options)
97
+ return nil if result == false
98
+
99
+ options = result if result.is_a?(Hash)
100
+ end
101
+ options
73
102
  end
74
103
 
75
104
  def reset!
76
- @sitemap_index = SiteMaps::Builder::SitemapIndex.new
105
+ xsl_url = config.respond_to?(:xsl_index_stylesheet_url) ? config.xsl_index_stylesheet_url : nil
106
+ @sitemap_index = SiteMaps::Builder::SitemapIndex.new(xsl_url: xsl_url)
77
107
  @repo = nil
78
108
  end
79
109
  end
@@ -14,8 +14,7 @@ class SiteMaps::Adapters::AwsSdk::Storage
14
14
  lastmod = options.delete(:last_modified) || Time.now
15
15
  options[:metadata] ||= {}
16
16
  options[:metadata]["given-last-modified"] = lastmod.utc.strftime("%Y-%m-%dT%H:%M:%S%:z")
17
- obj = object(location.remote_path)
18
- obj.upload_file(location.path, **options)
17
+ transfer_manager.upload_file(location.path, bucket: config.bucket, key: location.remote_path, **options)
19
18
  end
20
19
 
21
20
  def read(location)
@@ -49,4 +48,8 @@ class SiteMaps::Adapters::AwsSdk::Storage
49
48
  def object(remote_path)
50
49
  config.s3_bucket.object(remote_path)
51
50
  end
51
+
52
+ def transfer_manager
53
+ @transfer_manager ||= ::Aws::S3::TransferManager.new(client: config.s3_resource.client)
54
+ end
52
55
  end
@@ -25,7 +25,7 @@ class SiteMaps::Builder::SitemapIndex::Item < Struct.new(:loc, :lastmod)
25
25
  return unless loc =~ %r{^https?://[^/]+(/.*)$}
26
26
 
27
27
  val = File.dirname(Regexp.last_match(1))
28
- val = val[1..-1] if val.start_with?("/")
28
+ val = val[1..] if val.start_with?("/")
29
29
  val
30
30
  end
31
31
 
@@ -2,20 +2,22 @@
2
2
 
3
3
  module SiteMaps::Builder
4
4
  class SitemapIndex
5
- HEADER = <<~HEADER
6
- <?xml version="1.0" encoding="UTF-8"?>
5
+ XML_DECLARATION = %(<?xml version="1.0" encoding="UTF-8"?>)
6
+ SITEMAPINDEX_OPEN = <<~SITEMAPINDEX_OPEN
7
7
  <sitemapindex
8
8
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
9
9
  xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
10
10
  xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
11
11
  >
12
- HEADER
12
+ SITEMAPINDEX_OPEN
13
+ HEADER = "#{XML_DECLARATION}\n#{SITEMAPINDEX_OPEN}"
13
14
  FOOTER = "</sitemapindex>"
14
15
 
15
16
  attr_reader :sitemaps
16
17
 
17
- def initialize
18
+ def initialize(xsl_url: nil)
18
19
  @sitemaps = Concurrent::Set.new
20
+ @xsl_url = xsl_url
19
21
  end
20
22
 
21
23
  def add(loc, lastmod: nil)
@@ -25,7 +27,13 @@ module SiteMaps::Builder
25
27
 
26
28
  def to_xml
27
29
  io = StringIO.new
28
- io.puts(HEADER)
30
+ if @xsl_url
31
+ io.puts(XML_DECLARATION)
32
+ io.puts(XSLStylesheet.processing_instruction(@xsl_url))
33
+ io.puts(SITEMAPINDEX_OPEN)
34
+ else
35
+ io.puts(HEADER)
36
+ end
29
37
  @sitemaps.each do |sitemap|
30
38
  io.puts(sitemap.to_xml)
31
39
  end
@@ -33,8 +41,24 @@ module SiteMaps::Builder
33
41
  io.string
34
42
  end
35
43
 
44
+ def last_modified
45
+ dates = @sitemaps.filter_map { |s| parse_lastmod(s.lastmod) }
46
+ dates.max || Time.now
47
+ end
48
+
36
49
  def empty?
37
50
  @sitemaps.empty?
38
51
  end
52
+
53
+ private
54
+
55
+ def parse_lastmod(value)
56
+ case value
57
+ when Time then value
58
+ when String then Time.parse(value)
59
+ end
60
+ rescue ArgumentError
61
+ nil
62
+ end
39
63
  end
40
64
  end