site_maps 0.0.1.beta3 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -4
- data/.rubocop.yml +4 -2
- data/.tool-versions +1 -1
- data/AGENTS.md +73 -0
- data/CHANGELOG.md +5 -0
- data/CLAUDE.md +77 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +72 -56
- data/README.md +531 -393
- data/docs/README.md +67 -0
- data/docs/adapters.md +143 -0
- data/docs/api.md +154 -0
- data/docs/cli.md +93 -0
- data/docs/events.md +79 -0
- data/docs/extensions.md +141 -0
- data/docs/getting-started.md +138 -0
- data/docs/middleware.md +85 -0
- data/docs/processes.md +156 -0
- data/docs/rails.md +128 -0
- data/lib/site_maps/adapters/adapter.rb +35 -5
- data/lib/site_maps/adapters/aws_sdk/storage.rb +5 -2
- data/lib/site_maps/builder/sitemap_index/item.rb +1 -1
- data/lib/site_maps/builder/sitemap_index.rb +29 -5
- data/lib/site_maps/builder/url.rb +13 -10
- data/lib/site_maps/builder/url_set.rb +17 -7
- data/lib/site_maps/builder/xsl_stylesheet.rb +192 -0
- data/lib/site_maps/cli.rb +6 -2
- data/lib/site_maps/configuration.rb +8 -1
- data/lib/site_maps/incremental_location.rb +1 -1
- data/lib/site_maps/middleware.rb +197 -0
- data/lib/site_maps/notification/event.rb +1 -1
- data/lib/site_maps/notification/publisher.rb +1 -0
- data/lib/site_maps/notification.rb +1 -0
- data/lib/site_maps/ping.rb +35 -0
- data/lib/site_maps/{primitives → primitive}/array.rb +1 -1
- data/lib/site_maps/{primitives → primitive}/output.rb +1 -1
- data/lib/site_maps/primitive/string.rb +106 -0
- data/lib/site_maps/robots_txt.rb +21 -0
- data/lib/site_maps/runner/event_listener.rb +2 -2
- data/lib/site_maps/runner.rb +17 -3
- data/lib/site_maps/sitemap_builder.rb +16 -4
- data/lib/site_maps/sitemap_reader.rb +3 -0
- data/lib/site_maps/version.rb +1 -1
- data/lib/site_maps.rb +81 -10
- data/site_maps.gemspec +1 -1
- metadata +23 -10
- data/lib/site_maps/primitives/string.rb +0 -43
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# Getting Started
|
|
2
|
+
|
|
3
|
+
## Install
|
|
4
|
+
|
|
5
|
+
```ruby
|
|
6
|
+
# Gemfile
|
|
7
|
+
gem 'site_maps'
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
bundle install
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Your first sitemap
|
|
15
|
+
|
|
16
|
+
Create `config/sitemap.rb`:
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
SiteMaps.use(:file_system) do
|
|
20
|
+
configure do |config|
|
|
21
|
+
config.url = 'https://example.com/sitemap.xml'
|
|
22
|
+
config.directory = File.expand_path('public', __dir__)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
process do |s|
|
|
26
|
+
s.add('/', priority: 1.0, changefreq: 'daily')
|
|
27
|
+
s.add('/about', priority: 0.8, lastmod: Time.now)
|
|
28
|
+
s.add('/contact', priority: 0.5)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Generate:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
bundle exec site_maps generate --config-file config/sitemap.rb
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Output: `public/sitemap.xml`.
|
|
40
|
+
|
|
41
|
+
## Dynamic URLs
|
|
42
|
+
|
|
43
|
+
Yield `s.add` for every URL you want indexed. Database records work naturally:
|
|
44
|
+
|
|
45
|
+
```ruby
|
|
46
|
+
process :posts do |s|
|
|
47
|
+
Post.published.find_each do |post|
|
|
48
|
+
s.add("/posts/#{post.slug}", lastmod: post.updated_at, priority: 0.7)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
When the URL count of a single process exceeds `max_links` (default 50,000), the file is split into `sitemap1.xml`, `sitemap2.xml`, … and a sitemap index is written at `config.url`.
|
|
54
|
+
|
|
55
|
+
## Named processes
|
|
56
|
+
|
|
57
|
+
Named processes get their own file and run in parallel:
|
|
58
|
+
|
|
59
|
+
```ruby
|
|
60
|
+
SiteMaps.use(:file_system) do
|
|
61
|
+
configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
|
|
62
|
+
|
|
63
|
+
process :static do |s|
|
|
64
|
+
s.add('/')
|
|
65
|
+
s.add('/about')
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
process :posts, 'posts/sitemap.xml' do |s|
|
|
69
|
+
Post.find_each { |p| s.add("/posts/#{p.slug}") }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
process :products, 'products/sitemap.xml' do |s|
|
|
73
|
+
Product.find_each { |p| s.add("/products/#{p.id}") }
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Run all:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
bundle exec site_maps generate --config-file config/sitemap.rb --max-threads 4
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Run one:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
bundle exec site_maps generate posts --config-file config/sitemap.rb
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
See [processes.md](processes.md) for the full process DSL including parameterized templates.
|
|
91
|
+
|
|
92
|
+
## Using it in Rails
|
|
93
|
+
|
|
94
|
+
Add `site_maps` to your Gemfile and generate from a Rake task, a scheduled job, or your deploy pipeline. The Railtie injects URL helpers:
|
|
95
|
+
|
|
96
|
+
```ruby
|
|
97
|
+
# config/sitemap.rb
|
|
98
|
+
SiteMaps.use(:file_system) do
|
|
99
|
+
configure do |config|
|
|
100
|
+
config.url = 'https://example.com/sitemap.xml'
|
|
101
|
+
config.directory = Rails.public_path.to_s
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
process do |s|
|
|
105
|
+
s.add(s.route.root_path, priority: 1.0)
|
|
106
|
+
s.add(s.route.about_path)
|
|
107
|
+
Post.find_each { |post| s.add(s.route.post_path(post), lastmod: post.updated_at) }
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
See [rails.md](rails.md) for the full Rails integration, including asset precompile hooks and the Rack middleware for serving generated sitemaps.
|
|
113
|
+
|
|
114
|
+
## Uploading to S3
|
|
115
|
+
|
|
116
|
+
Swap the adapter line:
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
SiteMaps.use(:aws_sdk) do
|
|
120
|
+
configure do |config|
|
|
121
|
+
config.url = 'https://my-bucket.s3.amazonaws.com/sitemap.xml'
|
|
122
|
+
config.bucket = 'my-bucket'
|
|
123
|
+
config.region = ENV['AWS_REGION']
|
|
124
|
+
# access_key_id / secret_access_key default to ENV vars
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
process { |s| ... }
|
|
128
|
+
end
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
See [adapters.md](adapters.md) for adapter specifics and how to build your own.
|
|
132
|
+
|
|
133
|
+
## Next steps
|
|
134
|
+
|
|
135
|
+
- [Processes](processes.md) — split your sitemap into static and dynamic shards
|
|
136
|
+
- [SEO extensions](extensions.md) — image, video, news, hreflang
|
|
137
|
+
- [CLI](cli.md) — automation-friendly generate command
|
|
138
|
+
- [Rack middleware](middleware.md) — serve the generated files with correct headers
|
data/docs/middleware.md
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Rack Middleware
|
|
2
|
+
|
|
3
|
+
`SiteMaps::Middleware` serves generated sitemap files directly from the app. Useful when you've generated to `public/sitemaps/` (filesystem adapter) and want proper `Content-Type`, gzip handling, and XSL stylesheet routing without editing your web-server config.
|
|
4
|
+
|
|
5
|
+
## Basic usage
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
# config/application.rb (Rails)
|
|
9
|
+
config.middleware.use SiteMaps::Middleware, adapter: -> { SiteMaps.current_adapter }
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Or inline in `config.ru`:
|
|
13
|
+
|
|
14
|
+
```ruby
|
|
15
|
+
require 'site_maps'
|
|
16
|
+
|
|
17
|
+
use SiteMaps::Middleware, adapter: SiteMaps.current_adapter
|
|
18
|
+
run MyApp
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Options
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
use SiteMaps::Middleware,
|
|
25
|
+
adapter: SiteMaps.current_adapter,
|
|
26
|
+
public_prefix: nil,
|
|
27
|
+
storage_prefix: nil,
|
|
28
|
+
x_robots_tag: 'noindex, follow',
|
|
29
|
+
cache_control: 'public, max-age=3600'
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
| Option | Purpose |
|
|
33
|
+
|--------|---------|
|
|
34
|
+
| `adapter` | Adapter instance (or a callable returning one — useful if the adapter is reconfigured at boot). |
|
|
35
|
+
| `public_prefix` | Strip from request path before lookup — e.g. `/sitemap` if your app mounts them under a sub-path. |
|
|
36
|
+
| `storage_prefix` | Prepend to the lookup key — e.g. `tenants/acme` for multi-tenant layouts. |
|
|
37
|
+
| `x_robots_tag` | `X-Robots-Tag` header added to served files. |
|
|
38
|
+
| `cache_control` | `Cache-Control` header. |
|
|
39
|
+
|
|
40
|
+
## Behavior
|
|
41
|
+
|
|
42
|
+
The middleware intercepts requests for `*.xml` and `*.xml.gz` files:
|
|
43
|
+
|
|
44
|
+
- Matches → serve from the adapter with `Content-Type: application/xml`, plus `X-Robots-Tag` and `Cache-Control`.
|
|
45
|
+
- Gzipped sources → auto-decompress on serve so XSL stylesheets render in the browser. Clients asking for `.xml.gz` still get the compressed bytes.
|
|
46
|
+
- Doesn't match → `env` passes through to `@app.call`.
|
|
47
|
+
|
|
48
|
+
## XSL stylesheets
|
|
49
|
+
|
|
50
|
+
The middleware also serves the built-in XSL stylesheets — pretty sitemap rendering for human visitors — at their referenced paths. Configure their URLs via:
|
|
51
|
+
|
|
52
|
+
```ruby
|
|
53
|
+
configure do |config|
|
|
54
|
+
config.xsl_stylesheet_url = '/_sitemap-stylesheet.xsl'
|
|
55
|
+
config.xsl_index_stylesheet_url = '/_sitemap-index-stylesheet.xsl'
|
|
56
|
+
end
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Multi-tenant routing
|
|
60
|
+
|
|
61
|
+
For per-tenant sitemaps stored under subpaths:
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
use SiteMaps::Middleware,
|
|
65
|
+
adapter: per_request_adapter,
|
|
66
|
+
storage_prefix: ->(request) { "tenants/#{request.host.split('.').first}" }
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
If the adapter itself already scopes paths by tenant, no prefix is needed — just point it at the right one for each request.
|
|
70
|
+
|
|
71
|
+
## robots.txt integration
|
|
72
|
+
|
|
73
|
+
Emit a `Sitemap:` directive for the generated file:
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
# config.ru or a controller
|
|
77
|
+
SiteMaps::RobotsTxt.sitemap_directive('https://example.com/sitemap.xml')
|
|
78
|
+
# => "Sitemap: https://example.com/sitemap.xml"
|
|
79
|
+
|
|
80
|
+
SiteMaps::RobotsTxt.render(
|
|
81
|
+
sitemap_url: 'https://example.com/sitemap.xml',
|
|
82
|
+
extra_directives: ['Disallow: /admin']
|
|
83
|
+
)
|
|
84
|
+
# => "Sitemap: https://example.com/sitemap.xml\nDisallow: /admin"
|
|
85
|
+
```
|
data/docs/processes.md
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# Processes
|
|
2
|
+
|
|
3
|
+
A **process** is a unit of work that produces part of a sitemap. Each process runs on its own thread, writes its own URL set, and becomes an entry in the sitemap index.
|
|
4
|
+
|
|
5
|
+
## Static processes
|
|
6
|
+
|
|
7
|
+
A static process has no parameters. It runs once and writes one (possibly split) sitemap file.
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
SiteMaps.use(:file_system) do
|
|
11
|
+
configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
|
|
12
|
+
|
|
13
|
+
process do |s|
|
|
14
|
+
s.add('/', priority: 1.0)
|
|
15
|
+
s.add('/about')
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
process :posts, 'posts/sitemap.xml' do |s|
|
|
19
|
+
Post.find_each { |post| s.add("/posts/#{post.slug}", lastmod: post.updated_at) }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
- Without an explicit name, the process is named `:default`.
|
|
25
|
+
- Without an explicit location, a default filename is assigned.
|
|
26
|
+
- The block receives a `SitemapBuilder` (`s`), on which `add` is called per URL.
|
|
27
|
+
|
|
28
|
+
## Dynamic processes
|
|
29
|
+
|
|
30
|
+
A dynamic process has placeholders in its location template and corresponding kwargs. Each unique combination of kwargs produces a separate sitemap file.
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
process :monthly_posts, 'posts/%{year}-%{month}/sitemap.xml', year: 2024, month: 1 do |s, year:, month:, **|
|
|
34
|
+
Post.where('extract(year from published_at) = ? AND extract(month from published_at) = ?', year, month)
|
|
35
|
+
.find_each { |p| s.add("/posts/#{p.slug}", lastmod: p.updated_at) }
|
|
36
|
+
end
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
The kwargs passed to `process` are **defaults**; the real values come from `Runner#enqueue`:
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
runner = SiteMaps.generate(config_file: 'config/sitemap.rb')
|
|
43
|
+
runner.enqueue(:monthly_posts, year: 2024, month: 1)
|
|
44
|
+
runner.enqueue(:monthly_posts, year: 2024, month: 2)
|
|
45
|
+
runner.enqueue(:monthly_posts, year: 2024, month: 3)
|
|
46
|
+
runner.run
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Or from the CLI:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
bundle exec site_maps generate monthly_posts \
|
|
53
|
+
--config-file config/sitemap.rb \
|
|
54
|
+
--context=year:2024 month:1
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Execution model
|
|
58
|
+
|
|
59
|
+
When you call `runner.run`:
|
|
60
|
+
|
|
61
|
+
1. Each enqueued process is wrapped in a `Concurrent::Future`.
|
|
62
|
+
2. The pool (default 4 threads, configurable via `--max-threads`) runs them in parallel.
|
|
63
|
+
3. Each process builds a `URLSet`. When the set fills up (50,000 links, 1,000 news items, or 50 MB uncompressed), it's finalized and written, and a new URLSet starts — automatically.
|
|
64
|
+
4. After every process finishes, the sitemap index is aggregated and written to `config.url`.
|
|
65
|
+
|
|
66
|
+
## Splitting rules
|
|
67
|
+
|
|
68
|
+
A URL set is finalized and rolled over when **any** of these apply:
|
|
69
|
+
|
|
70
|
+
- Links reach `config.max_links` (default 50,000 — the sitemap spec limit).
|
|
71
|
+
- News entries reach 1,000.
|
|
72
|
+
- Uncompressed XML reaches 50 MB.
|
|
73
|
+
|
|
74
|
+
Split files are named by `IncrementalLocation`: `posts/sitemap.xml` becomes `posts/sitemap1.xml`, `posts/sitemap2.xml`, etc.
|
|
75
|
+
|
|
76
|
+
## Index generation
|
|
77
|
+
|
|
78
|
+
A sitemap index is produced when:
|
|
79
|
+
|
|
80
|
+
- More than one process exists,
|
|
81
|
+
- A single process was split across multiple files, or
|
|
82
|
+
- External sitemaps were added.
|
|
83
|
+
|
|
84
|
+
Otherwise a single `urlset` is written directly at `config.url` (the "inline" optimization).
|
|
85
|
+
|
|
86
|
+
## Adding external sitemaps
|
|
87
|
+
|
|
88
|
+
Reference third-party or pre-existing sitemaps in the index:
|
|
89
|
+
|
|
90
|
+
```ruby
|
|
91
|
+
SiteMaps.use(:file_system) do
|
|
92
|
+
configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
|
|
93
|
+
|
|
94
|
+
external_sitemap('https://cdn.example.com/legacy-sitemap.xml', lastmod: Time.parse('2024-01-15'))
|
|
95
|
+
|
|
96
|
+
process { |s| s.add('/') }
|
|
97
|
+
end
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Shared helpers across processes
|
|
101
|
+
|
|
102
|
+
Use `extend_processes_with` to add methods that every process block can call:
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
module Helpers
|
|
106
|
+
def post_path(post) = "/posts/#{post.slug}"
|
|
107
|
+
def published_posts = Post.where.not(published_at: nil)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
SiteMaps.use(:file_system) do
|
|
111
|
+
configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
|
|
112
|
+
extend_processes_with(Helpers)
|
|
113
|
+
|
|
114
|
+
process :posts do |s|
|
|
115
|
+
published_posts.find_each { |p| s.add(post_path(p), lastmod: p.updated_at) }
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## URL filters
|
|
121
|
+
|
|
122
|
+
Filters run per URL inside every process — use them for global exclusions or default attributes:
|
|
123
|
+
|
|
124
|
+
```ruby
|
|
125
|
+
SiteMaps.use(:file_system) do
|
|
126
|
+
configure { |c| c.url = 'https://example.com/sitemap.xml'; c.directory = 'public' }
|
|
127
|
+
|
|
128
|
+
# Exclude any /admin path
|
|
129
|
+
url_filter { |url, _options| false if url.include?('/admin') }
|
|
130
|
+
|
|
131
|
+
# Boost blog priority
|
|
132
|
+
url_filter do |url, options|
|
|
133
|
+
if url.include?('/blog/')
|
|
134
|
+
options.merge(priority: 0.9, changefreq: 'daily')
|
|
135
|
+
else
|
|
136
|
+
options
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
process { |s| ... }
|
|
141
|
+
end
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
A filter returning `false` (or `nil`) excludes the URL entirely. Returning a hash replaces the options.
|
|
145
|
+
|
|
146
|
+
## Re-running a single shard
|
|
147
|
+
|
|
148
|
+
Only regenerate what changed — the rest is preserved from the existing sitemap index:
|
|
149
|
+
|
|
150
|
+
```ruby
|
|
151
|
+
runner = SiteMaps.generate(config_file: 'config/sitemap.rb')
|
|
152
|
+
runner.enqueue(:monthly_posts, year: 2024, month: 3) # only March
|
|
153
|
+
runner.run # Jan and Feb kept as-is
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
This is the main advantage of parameterized dynamic processes: you can rebuild one month's shard on a cron and leave the rest untouched.
|
data/docs/rails.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Rails Integration
|
|
2
|
+
|
|
3
|
+
The Railtie loads automatically when Rails is present. It wires two things:
|
|
4
|
+
|
|
5
|
+
1. **URL helpers** — `s.route.<helper>` inside process blocks.
|
|
6
|
+
2. **No other magic** — no initializer, no autoloaded directories, no patched generators.
|
|
7
|
+
|
|
8
|
+
## URL helpers in processes
|
|
9
|
+
|
|
10
|
+
```ruby
|
|
11
|
+
# config/sitemap.rb
|
|
12
|
+
SiteMaps.use(:file_system) do
|
|
13
|
+
configure do |config|
|
|
14
|
+
config.url = 'https://example.com/sitemap.xml'
|
|
15
|
+
config.directory = Rails.public_path.to_s
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
process do |s|
|
|
19
|
+
s.add(s.route.root_path, priority: 1.0)
|
|
20
|
+
s.add(s.route.about_path)
|
|
21
|
+
Post.find_each { |p| s.add(s.route.post_path(p), lastmod: p.updated_at) }
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
`s.route` is a singleton wrapping `Rails.application.routes.url_helpers`.
|
|
27
|
+
|
|
28
|
+
## Generating from Rails
|
|
29
|
+
|
|
30
|
+
### One-off
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
bundle exec site_maps generate --config-file config/sitemap.rb
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
The CLI auto-requires `config/environment.rb` if it finds a `config/application.rb`, so ActiveRecord, URL helpers, and everything else loads as normal.
|
|
37
|
+
|
|
38
|
+
### From a Rake task
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
# lib/tasks/sitemap.rake
|
|
42
|
+
namespace :sitemap do
|
|
43
|
+
desc 'Generate sitemaps'
|
|
44
|
+
task generate: :environment do
|
|
45
|
+
runner = SiteMaps.generate(config_file: Rails.root.join('config/sitemap.rb').to_s)
|
|
46
|
+
runner.enqueue_all.run
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Run on deploy or via cron:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
bundle exec rake sitemap:generate
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### From a scheduled job
|
|
58
|
+
|
|
59
|
+
```ruby
|
|
60
|
+
class SitemapJob < ApplicationJob
|
|
61
|
+
def perform
|
|
62
|
+
runner = SiteMaps.generate(config_file: Rails.root.join('config/sitemap.rb').to_s)
|
|
63
|
+
runner.enqueue_all.run
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
SitemapJob.set(cron: '0 3 * * *').perform_later
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Serving generated sitemaps
|
|
71
|
+
|
|
72
|
+
Add the Rack middleware to serve files generated by the `:file_system` adapter:
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
# config/application.rb
|
|
76
|
+
config.middleware.use SiteMaps::Middleware, adapter: -> { SiteMaps.current_adapter }
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
See [middleware.md](middleware.md) for options.
|
|
80
|
+
|
|
81
|
+
## Asset precompile integration
|
|
82
|
+
|
|
83
|
+
If you want sitemaps regenerated on every deploy, hook into `assets:precompile`:
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
# lib/tasks/sitemap.rake
|
|
87
|
+
Rake::Task['assets:precompile'].enhance(['sitemap:generate'])
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## robots.txt
|
|
91
|
+
|
|
92
|
+
```erb
|
|
93
|
+
<%# public/robots.txt.erb or app/views/robots.text.erb %>
|
|
94
|
+
User-agent: *
|
|
95
|
+
Disallow: /admin
|
|
96
|
+
|
|
97
|
+
<%= SiteMaps::RobotsTxt.sitemap_directive('https://example.com/sitemap.xml') %>
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Multi-tenant
|
|
101
|
+
|
|
102
|
+
`SiteMaps.define` gives you a generation function parameterized by runtime context:
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
# config/sitemap.rb
|
|
106
|
+
SiteMaps.define do |tenant:|
|
|
107
|
+
use(:file_system) do
|
|
108
|
+
configure do |config|
|
|
109
|
+
config.url = "https://#{tenant.domain}/sitemap.xml"
|
|
110
|
+
config.directory = tenant.public_path
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
process { |s| tenant.pages.each { |page| s.add(page.path, lastmod: page.updated_at) } }
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
Tenant.find_each do |tenant|
|
|
120
|
+
SiteMaps.generate(config_file: 'config/sitemap.rb', context: { tenant: tenant }).enqueue_all.run
|
|
121
|
+
end
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
The context hash is splatted into the `define` block as keyword args.
|
|
125
|
+
|
|
126
|
+
## Dependencies
|
|
127
|
+
|
|
128
|
+
- Rails is **not** listed in the gemspec. The Railtie is loaded only if Rails is already present. If you're using `site_maps` in a non-Rails Ruby project, the Rails-specific pieces are inert.
|
|
@@ -13,11 +13,12 @@ module SiteMaps::Adapters
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def_delegators :config, :fetch_sitemap_index_links
|
|
16
|
-
attr_reader :sitemap_index, :processes
|
|
16
|
+
attr_reader :sitemap_index, :processes, :process_mixins
|
|
17
17
|
|
|
18
18
|
def initialize(**options, &block)
|
|
19
19
|
@config = SiteMaps.config.becomes(self.class.config_class, **options)
|
|
20
20
|
@processes = Concurrent::Hash.new
|
|
21
|
+
@process_mixins = Concurrent::Array.new
|
|
21
22
|
reset!
|
|
22
23
|
instance_exec(&block) if block
|
|
23
24
|
end
|
|
@@ -60,20 +61,49 @@ module SiteMaps::Adapters
|
|
|
60
61
|
@processes[name] = SiteMaps::Process.new(name, location, kwargs, block)
|
|
61
62
|
end
|
|
62
63
|
|
|
64
|
+
def external_sitemap(url, lastmod: nil)
|
|
65
|
+
@external_sitemaps ||= Concurrent::Array.new
|
|
66
|
+
@external_sitemaps << SiteMaps::Builder::SitemapIndex::Item.new(url, lastmod)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def external_sitemaps
|
|
70
|
+
@external_sitemaps || []
|
|
71
|
+
end
|
|
72
|
+
|
|
63
73
|
def maybe_inline_urlset?
|
|
64
|
-
@processes.size == 1 && @processes.first.last.static?
|
|
74
|
+
@processes.size == 1 && @processes.first.last.static? && external_sitemaps.empty?
|
|
65
75
|
end
|
|
66
76
|
|
|
67
77
|
def repo
|
|
68
78
|
@repo ||= SiteMaps::AtomicRepository.new(config.url)
|
|
69
79
|
end
|
|
70
80
|
|
|
71
|
-
def
|
|
72
|
-
|
|
81
|
+
def extend_processes_with(mod)
|
|
82
|
+
@process_mixins << mod
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def url_filter(&block)
|
|
86
|
+
if block
|
|
87
|
+
@url_filters ||= Concurrent::Array.new
|
|
88
|
+
@url_filters << block
|
|
89
|
+
end
|
|
90
|
+
@url_filters || []
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def apply_url_filters(link, options)
|
|
94
|
+
url = link.respond_to?(:to_s) ? link.to_s : link
|
|
95
|
+
url_filter.each do |filter|
|
|
96
|
+
result = filter.call(url, options)
|
|
97
|
+
return nil if result == false
|
|
98
|
+
|
|
99
|
+
options = result if result.is_a?(Hash)
|
|
100
|
+
end
|
|
101
|
+
options
|
|
73
102
|
end
|
|
74
103
|
|
|
75
104
|
def reset!
|
|
76
|
-
|
|
105
|
+
xsl_url = config.respond_to?(:xsl_index_stylesheet_url) ? config.xsl_index_stylesheet_url : nil
|
|
106
|
+
@sitemap_index = SiteMaps::Builder::SitemapIndex.new(xsl_url: xsl_url)
|
|
77
107
|
@repo = nil
|
|
78
108
|
end
|
|
79
109
|
end
|
|
@@ -14,8 +14,7 @@ class SiteMaps::Adapters::AwsSdk::Storage
|
|
|
14
14
|
lastmod = options.delete(:last_modified) || Time.now
|
|
15
15
|
options[:metadata] ||= {}
|
|
16
16
|
options[:metadata]["given-last-modified"] = lastmod.utc.strftime("%Y-%m-%dT%H:%M:%S%:z")
|
|
17
|
-
|
|
18
|
-
obj.upload_file(location.path, **options)
|
|
17
|
+
transfer_manager.upload_file(location.path, bucket: config.bucket, key: location.remote_path, **options)
|
|
19
18
|
end
|
|
20
19
|
|
|
21
20
|
def read(location)
|
|
@@ -49,4 +48,8 @@ class SiteMaps::Adapters::AwsSdk::Storage
|
|
|
49
48
|
def object(remote_path)
|
|
50
49
|
config.s3_bucket.object(remote_path)
|
|
51
50
|
end
|
|
51
|
+
|
|
52
|
+
def transfer_manager
|
|
53
|
+
@transfer_manager ||= ::Aws::S3::TransferManager.new(client: config.s3_resource.client)
|
|
54
|
+
end
|
|
52
55
|
end
|
|
@@ -25,7 +25,7 @@ class SiteMaps::Builder::SitemapIndex::Item < Struct.new(:loc, :lastmod)
|
|
|
25
25
|
return unless loc =~ %r{^https?://[^/]+(/.*)$}
|
|
26
26
|
|
|
27
27
|
val = File.dirname(Regexp.last_match(1))
|
|
28
|
-
val = val[1
|
|
28
|
+
val = val[1..] if val.start_with?("/")
|
|
29
29
|
val
|
|
30
30
|
end
|
|
31
31
|
|
|
@@ -2,20 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
module SiteMaps::Builder
|
|
4
4
|
class SitemapIndex
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
XML_DECLARATION = %(<?xml version="1.0" encoding="UTF-8"?>)
|
|
6
|
+
SITEMAPINDEX_OPEN = <<~SITEMAPINDEX_OPEN
|
|
7
7
|
<sitemapindex
|
|
8
8
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
9
9
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
|
|
10
10
|
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
11
11
|
>
|
|
12
|
-
|
|
12
|
+
SITEMAPINDEX_OPEN
|
|
13
|
+
HEADER = "#{XML_DECLARATION}\n#{SITEMAPINDEX_OPEN}"
|
|
13
14
|
FOOTER = "</sitemapindex>"
|
|
14
15
|
|
|
15
16
|
attr_reader :sitemaps
|
|
16
17
|
|
|
17
|
-
def initialize
|
|
18
|
+
def initialize(xsl_url: nil)
|
|
18
19
|
@sitemaps = Concurrent::Set.new
|
|
20
|
+
@xsl_url = xsl_url
|
|
19
21
|
end
|
|
20
22
|
|
|
21
23
|
def add(loc, lastmod: nil)
|
|
@@ -25,7 +27,13 @@ module SiteMaps::Builder
|
|
|
25
27
|
|
|
26
28
|
def to_xml
|
|
27
29
|
io = StringIO.new
|
|
28
|
-
|
|
30
|
+
if @xsl_url
|
|
31
|
+
io.puts(XML_DECLARATION)
|
|
32
|
+
io.puts(XSLStylesheet.processing_instruction(@xsl_url))
|
|
33
|
+
io.puts(SITEMAPINDEX_OPEN)
|
|
34
|
+
else
|
|
35
|
+
io.puts(HEADER)
|
|
36
|
+
end
|
|
29
37
|
@sitemaps.each do |sitemap|
|
|
30
38
|
io.puts(sitemap.to_xml)
|
|
31
39
|
end
|
|
@@ -33,8 +41,24 @@ module SiteMaps::Builder
|
|
|
33
41
|
io.string
|
|
34
42
|
end
|
|
35
43
|
|
|
44
|
+
def last_modified
|
|
45
|
+
dates = @sitemaps.filter_map { |s| parse_lastmod(s.lastmod) }
|
|
46
|
+
dates.max || Time.now
|
|
47
|
+
end
|
|
48
|
+
|
|
36
49
|
def empty?
|
|
37
50
|
@sitemaps.empty?
|
|
38
51
|
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def parse_lastmod(value)
|
|
56
|
+
case value
|
|
57
|
+
when Time then value
|
|
58
|
+
when String then Time.parse(value)
|
|
59
|
+
end
|
|
60
|
+
rescue ArgumentError
|
|
61
|
+
nil
|
|
62
|
+
end
|
|
39
63
|
end
|
|
40
64
|
end
|