site_maps 0.0.1.beta1 → 0.0.1.beta3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +495 -34
- data/lib/site_maps/notification.rb +4 -4
- data/lib/site_maps/process.rb +6 -0
- data/lib/site_maps/runner/event_listener.rb +16 -11
- data/lib/site_maps/runner.rb +5 -4
- data/lib/site_maps/sitemap_builder.rb +5 -4
- data/lib/site_maps/sitemap_reader.rb +5 -5
- data/lib/site_maps/version.rb +1 -1
- data/site_maps.gemspec +2 -2
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d0b92468df93a09f176223cd329a97253051d3beece1ebd3c33efe2a8a23b109
|
4
|
+
data.tar.gz: 6d47acbaa8f176cd931929100a26ea1c3184005c6a127dc69bea41fa5ff6194f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 58bf88572f4e54bbdf784d33146908198d6fccb4558573946f84ee566a1b2fcb600eada66d81e0c2bc6b4529cee9eb7e631559f1ee863756d8ed561f13beb818
|
7
|
+
data.tar.gz: 4c432015c4a7e2463a22f47742ad779efe2fbcfa7577c408ad50681855bc2b6d8697cde7bc04acb9f8c2113d6f8d41c0571e3c40de53513cf6aa3275ba60a635
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
site_maps (0.0.1.
|
4
|
+
site_maps (0.0.1.beta3)
|
5
5
|
builder (~> 3.0)
|
6
6
|
concurrent-ruby (>= 1.1)
|
7
7
|
rack (>= 2.0)
|
@@ -111,7 +111,7 @@ GEM
|
|
111
111
|
addressable (>= 2.8.0)
|
112
112
|
crack (>= 0.3.2)
|
113
113
|
hashdiff (>= 0.4.0, < 2.0.0)
|
114
|
-
zeitwerk (2.
|
114
|
+
zeitwerk (2.7.1)
|
115
115
|
|
116
116
|
PLATFORMS
|
117
117
|
x86_64-linux
|
data/README.md
CHANGED
@@ -42,15 +42,60 @@ end
|
|
42
42
|
|
43
43
|
After creating the configuration file, you can run the following command to generate the sitemap:
|
44
44
|
|
45
|
-
```
|
45
|
+
```ruby
|
46
46
|
SiteMaps.generate(config_file: "config/sitemap.rb")
|
47
47
|
.enqueue_all
|
48
48
|
.run
|
49
49
|
```
|
50
50
|
|
51
|
-
|
51
|
+
or you can use the CLI to generate the sitemap:
|
52
|
+
|
53
|
+
```bash
|
54
|
+
bundle exec site_maps generate --config-file config/sitemap.rb
|
55
|
+
```
|
56
|
+
|
57
|
+
### Configuration
|
58
|
+
|
59
|
+
Configuration can be defined using the `configure` block or by passing the configuration options to the `use` method. Each adapter may have specific configuration options, but the following options are common to all adapters:
|
60
|
+
|
61
|
+
* `url` - URL of the main sitemap index file. This URL must ends with `.xml` or `.xml.gz`.
|
62
|
+
* `directory` - Directory where the sitemap files will be stored.
|
63
|
+
|
64
|
+
Configuration using the `#configure` block
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
SiteMaps.use(:file_system) do
|
68
|
+
configure do |config|
|
69
|
+
config.url = "https://example.com/sitemaps/sitemap.xml.gz"
|
70
|
+
config.directory = "/home/www/public"
|
71
|
+
end
|
72
|
+
# define sitemap processes..
|
73
|
+
end
|
74
|
+
```
|
75
|
+
|
76
|
+
Configuration using `#config` method
|
52
77
|
|
53
|
-
|
78
|
+
```ruby
|
79
|
+
SiteMaps.use(:file_system) do
|
80
|
+
config.url = "https://example.com/sitemaps/sitemap.xml.gz"
|
81
|
+
config.directory = "/home/www/public"
|
82
|
+
# define sitemap processes..
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
Configuration passing options to the `#use` method
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
SiteMaps.use(:file_system, url: "https://example.com/sitemaps/sitemap.xml.gz", directory: "/home/www/public") do
|
90
|
+
# define sitemap processes..
|
91
|
+
end
|
92
|
+
```
|
93
|
+
|
94
|
+
Refer to the adapter documentation to see the specific configuration options.
|
95
|
+
|
96
|
+
### Gzip Compression
|
97
|
+
|
98
|
+
The sitemap files can be automatically compressed using the gzip algorithm. To enable the gzip compression, just pass the sitemap url with the `.gz` extension.
|
54
99
|
|
55
100
|
```ruby
|
56
101
|
# config/sitemap.rb
|
@@ -59,6 +104,39 @@ SiteMaps.use(:file_system) do
|
|
59
104
|
config.url = "https://example.com/sitemaps/sitemap.xml.gz" # Location of main sitemap index file
|
60
105
|
config.directory = "/home/www/public"
|
61
106
|
end
|
107
|
+
process do |s|
|
108
|
+
# Add sitemap links
|
109
|
+
end
|
110
|
+
end
|
111
|
+
```
|
112
|
+
|
113
|
+
### Sitemap Index
|
114
|
+
|
115
|
+
For small websites, you can use a single sitemap file to store all the links. However, for large websites with thousands of links, you should use a sitemap index file to store the sitemap links. This library will automatically generate the sitemap index file if you define multiple processes or if the amount of links exceeds the maximum limit of links or file size.
|
116
|
+
|
117
|
+
|
118
|
+
Criteria to generate the sitemap index file:
|
119
|
+
* Multiple processes defined in the configuration file.
|
120
|
+
* The amount of links exceeds the maximum limit of links (50,000 links).
|
121
|
+
* The amount of news links exceeds the maximum limit of news links (1,000 links).
|
122
|
+
* The uncompressed file size exceeds the maximum limit of file size (50MB).
|
123
|
+
|
124
|
+
### Static and Dynamic Processes
|
125
|
+
|
126
|
+
Sitemap links are defined in the `process` block because the gem is designed to generate sitemaps for large websites in parallel. It means that each process will be executed in a separate thread, which will improve the performance of the sitemap generation.
|
127
|
+
|
128
|
+
Each process can have a unique name and a unique sitemap file location. By omitting the name and the file location, the process will use the `:default` value.
|
129
|
+
|
130
|
+
Bellow is an example of a configuration file with multiple processes:
|
131
|
+
|
132
|
+
```ruby
|
133
|
+
# config/sitemap.rb
|
134
|
+
SiteMaps.use(:file_system) do
|
135
|
+
configure do |config|
|
136
|
+
config.url = "https://example.com/sitemaps/sitemap.xml" # Location of main sitemap index file
|
137
|
+
config.directory = "/home/www/public"
|
138
|
+
end
|
139
|
+
# Static Processes
|
62
140
|
process do |s|
|
63
141
|
s.add('/', priority: 1.0, changefreq: "daily")
|
64
142
|
s.add('/about', priority: 0.9, changefreq: "weekly")
|
@@ -68,38 +146,281 @@ SiteMaps.use(:file_system) do
|
|
68
146
|
s.add(category_path(category), priority: 0.7)
|
69
147
|
end
|
70
148
|
end
|
71
|
-
|
72
|
-
|
149
|
+
# Dynamic Processes
|
150
|
+
process :posts, "posts/%{year}-%{month}/sitemap.xml", year: Date.today.year, month: Date.today.month do |s, year:, month:|
|
151
|
+
Post.where(year: year.to_i, month: month.to_i).find_each do |post|
|
73
152
|
s.add(post_path(post), priority: 0.8)
|
74
153
|
end
|
75
154
|
end
|
76
155
|
end
|
77
156
|
```
|
78
157
|
|
79
|
-
|
158
|
+
Dynamic `process` are defined by passing a process name, a location, and a list of extra arguments that will be dinamically replaced by the given values in the `enqueue` method.
|
159
|
+
|
160
|
+
Location can contain placeholders that will be replaced by the values passed to the process block(The `%{year}` and `%{month}` of example bellow). Both relative and absolute paths are supported. Note that when using relative paths, the base dir of main sitemap index file will be used as the root directory.
|
161
|
+
|
162
|
+
It will let you enqueue the same process multiple times with different values.
|
80
163
|
|
81
164
|
```ruby
|
82
165
|
SiteMaps.generate(config_file: "config/sitemap.rb")
|
83
|
-
.enqueue(:posts, year: 2021, month:
|
84
|
-
.enqueue(:posts, year: 2021, month:
|
166
|
+
.enqueue(:posts, year: "2021", month: "01")
|
167
|
+
.enqueue(:posts, year: "2021", month: "02")
|
85
168
|
.enqueue_remaining # Enqueue all remaining processes (default and categories)
|
86
169
|
.run
|
87
170
|
```
|
88
171
|
|
89
|
-
|
172
|
+
**Important Considerations:**
|
173
|
+
|
174
|
+
* The values of the extra arguments may be strings when they are coming from the CLI or other sources.
|
175
|
+
* By omitting the extra arguments, the process will be enqueued with the default values defined in the configuration file. So make sure you define default values or properly add nil checks in the process block to avoid errors.
|
176
|
+
|
177
|
+
### Sitemap Extensions
|
178
|
+
|
179
|
+
The sitemap builder supports the following sitemap extensions:
|
180
|
+
|
181
|
+
* [Alternate](http://support.google.com/webmasters/bin/answer.py?hl=en&answer=2620865)
|
182
|
+
* [Image](https://support.google.com/webmasters/answer/178636?hl=en)
|
183
|
+
* [Mobile](http://support.google.com/webmasters/bin/answer.py?hl=en&answer=34648)
|
184
|
+
* [News](https://support.google.com/news/publisher-center/answer/9606710?hl=en)
|
185
|
+
* [PageMap](https://developers.google.com/custom-search/docs/structured_data?csw=1#pagemaps)
|
186
|
+
* [Video](https://support.google.com/webmasters/answer/80471?hl=en)
|
187
|
+
|
188
|
+
You can add the sitemap links with the extensions by passing a hash with the extension name as the key and the extension attributes as the value.
|
189
|
+
|
190
|
+
#### Image
|
191
|
+
|
192
|
+
Images can be added to the sitemap links by passing `images` attributes to the `add` method. The `images` attribute should be an array of hashes with the image attributes.
|
193
|
+
|
194
|
+
Check out the Google specification [here](https://support.google.com/webmasters/answer/178636?hl=en).
|
90
195
|
|
91
196
|
```ruby
|
92
|
-
|
93
|
-
SiteMaps.use(:file_system) do
|
94
|
-
|
197
|
+
config = { ... }
|
198
|
+
SiteMaps.use(:file_system, **config) do
|
199
|
+
process do |s|
|
200
|
+
s.add(
|
201
|
+
'/',
|
202
|
+
priority: 1.0,
|
203
|
+
changefreq: "daily",
|
204
|
+
images: [
|
205
|
+
{ loc: "https://example.com/image.jpg" }
|
206
|
+
],
|
207
|
+
)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
```
|
211
|
+
|
212
|
+
Supported attributes:
|
213
|
+
* `loc` - URL of the image.
|
214
|
+
* `caption` - Image caption.
|
215
|
+
* `geo_location` - Image geo location.
|
216
|
+
* `title` - Image title.
|
217
|
+
* `license` - Image license.
|
218
|
+
|
219
|
+
#### Video
|
220
|
+
|
221
|
+
Videos can be added to the sitemap links by passing `videos` attributes to the `add` method. The `videos` attribute should be an array of hashes with the video attributes.
|
222
|
+
|
223
|
+
Check out the Google specification [here](https://support.google.com/webmasters/answer/80471?hl=en).
|
224
|
+
|
225
|
+
```ruby
|
226
|
+
config = { ... }
|
227
|
+
SiteMaps.use(:file_system, **config) do
|
228
|
+
process do |s|
|
229
|
+
s.add(
|
230
|
+
'/',
|
231
|
+
priority: 1.0,
|
232
|
+
changefreq: "daily",
|
233
|
+
videos: [
|
234
|
+
{
|
235
|
+
thumbnail_loc: "https://example.com/thumbnail.jpg",
|
236
|
+
title: "Video Title",
|
237
|
+
description: "Video Description",
|
238
|
+
content_loc: "https://example.com/video.mp4",
|
239
|
+
player_loc: "https://example.com/player.swf",
|
240
|
+
allow_embed: "yes",
|
241
|
+
autoplay: "ap=1",
|
242
|
+
# ...
|
243
|
+
}
|
244
|
+
],
|
245
|
+
)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
```
|
249
|
+
Supported attributes:
|
250
|
+
* `thumbnail_loc` - URL of the thumbnail image.
|
251
|
+
* `title` - Title of the video.
|
252
|
+
* `description` - Description of the video.
|
253
|
+
* `content_loc` - URL of the video content.
|
254
|
+
* `player_loc` - URL of the video player.
|
255
|
+
* `allow_embed` - Allow embed attribute of the player location.
|
256
|
+
* `autoplay` - Autoplay attribute of the player location.
|
257
|
+
* `duration` - Duration of the video in seconds.
|
258
|
+
* `expiration_date` - Expiration date of the video.
|
259
|
+
* `rating` - Rating of the video.
|
260
|
+
* `view_count` - View count of the video.
|
261
|
+
* `publication_date` - Publication date of the video.
|
262
|
+
* `tags` - Tags of the video.
|
263
|
+
* `tag` - Single tag of the video.
|
264
|
+
* `category` - Category of the video.
|
265
|
+
* `family_friendly` - Family friendly attribute of the video.
|
266
|
+
* `gallery_loc` - URL of the video gallery.
|
267
|
+
* `gallery_title` - Title of the video gallery.
|
268
|
+
* `uploader` - Uploader of the video.
|
269
|
+
* `uploader_info` - Uploader info of the video.
|
270
|
+
* `price` - Price of the video.
|
271
|
+
* `price_currency` - Currency of the video price.
|
272
|
+
* `price_type` - Type of the video price.
|
273
|
+
* `price_resolution` - Resolution of the video price.
|
274
|
+
* `live` - Live attribute of the video.
|
275
|
+
* `requires_subscription` - Requires subscription attribute of the video.
|
276
|
+
|
277
|
+
#### PageMap
|
278
|
+
|
279
|
+
PageMap sitemaps can be added to the sitemap links by passing `pagemap` attributes to the `add` method. The `pagemap` attribute should be a hash with the pagemap attributes.
|
280
|
+
|
281
|
+
Check out the Google specification [here](https://developers.google.com/custom-search/docs/structured_data?csw=1#pagemaps).
|
282
|
+
|
283
|
+
```ruby
|
284
|
+
config = { ... }
|
285
|
+
SiteMaps.use(:file_system, **config) do
|
286
|
+
process do |s|
|
287
|
+
s.add(
|
288
|
+
'/',
|
289
|
+
priority: 1.0,
|
290
|
+
changefreq: "daily",
|
291
|
+
pagemap: {
|
292
|
+
dataobjects: [
|
293
|
+
{
|
294
|
+
type: "document",
|
295
|
+
id: "1",
|
296
|
+
attributes: [
|
297
|
+
{ name: "title", value: "Page Title" },
|
298
|
+
{ name: "description", value: "Page Description" },
|
299
|
+
{ name: "url", value: "https://example.com" },
|
300
|
+
]
|
301
|
+
}
|
302
|
+
]
|
303
|
+
}
|
304
|
+
)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
```
|
308
|
+
|
309
|
+
Supported attributes:
|
310
|
+
* `dataobjects` - Array of hashes with the data objects.
|
311
|
+
* `type` - Type of the object.
|
312
|
+
* `id` - ID of the object.
|
313
|
+
* `attributes` - Array of hashes with the attributes.
|
314
|
+
* `name` - Name of the attribute.
|
315
|
+
* `value` - Value of the attribute.
|
316
|
+
|
317
|
+
#### News
|
318
|
+
|
319
|
+
News sitemaps can be added to the sitemap links by passing `news` attributes to the `add` method. The `news` attribute should be a hash with the news attributes.
|
320
|
+
|
321
|
+
Check out the Google specification [here](https://support.google.com/news/publisher-center/answer/9606710?hl=en).
|
322
|
+
|
323
|
+
```ruby
|
324
|
+
config = { ... }
|
325
|
+
SiteMaps.use(:file_system, **config) do
|
326
|
+
process do |s|
|
327
|
+
s.add(
|
328
|
+
'/',
|
329
|
+
priority: 1.0,
|
330
|
+
changefreq: "daily",
|
331
|
+
news: {
|
332
|
+
publication_name: "Publication Name",
|
333
|
+
publication_language: "en",
|
334
|
+
publication_date: Time.now,
|
335
|
+
genres: "PressRelease",
|
336
|
+
access: "Subscription",
|
337
|
+
title: "News Title",
|
338
|
+
keywords: "News Keywords",
|
339
|
+
stock_tickers: "NASDAQ:GOOG",
|
340
|
+
}
|
341
|
+
)
|
342
|
+
end
|
343
|
+
end
|
344
|
+
```
|
345
|
+
|
346
|
+
Supported attributes:
|
347
|
+
* `publication_name` - Name of the publication.
|
348
|
+
* `publication_language` - Language of the publication.
|
349
|
+
* `publication_date` - Publication date of the news.
|
350
|
+
* `genres` - Genres of the news.
|
351
|
+
* `access` - Access of the news.
|
352
|
+
* `title` - Title of the news.
|
353
|
+
* `keywords` - Keywords of the news.
|
354
|
+
* `stock_tickers` - Stock tickers of the news.
|
355
|
+
|
356
|
+
#### Alternates
|
357
|
+
|
358
|
+
You can add alternate links to the sitemap links by passing `alternates` attributes to the `add` method. The `alternates` attribute should be an array of hashes with the alternate attributes.
|
359
|
+
|
360
|
+
Check out the Google specification [here](http://support.google.com/webmasters/bin/answer.py?hl=en&answer=2620865).
|
361
|
+
|
362
|
+
```ruby
|
363
|
+
config = { ... }
|
364
|
+
SiteMaps.use(:file_system, **config) do
|
365
|
+
process do |s|
|
366
|
+
s.add(
|
367
|
+
'/',
|
368
|
+
priority: 1.0,
|
369
|
+
changefreq: "daily",
|
370
|
+
alternates: [
|
371
|
+
{ href: "https://example.com/en", lang: "en" },
|
372
|
+
{ href: "https://example.com/es", lang: "es" },
|
373
|
+
],
|
374
|
+
)
|
375
|
+
end
|
376
|
+
end
|
377
|
+
```
|
378
|
+
|
379
|
+
Supported attributes:
|
380
|
+
* `href` - URL of the alternate link. (Required)
|
381
|
+
* `lang` - Language of the alternate link. (Optional)
|
382
|
+
* `nofollow` - Nofollow attribute of the alternate link. (Optional)
|
383
|
+
* `media` - Media targets for responsive design pages. (Optional)
|
384
|
+
|
385
|
+
#### Mobile
|
386
|
+
|
387
|
+
Mobile sitemaps include a specific <mobile:mobile/> tag.
|
388
|
+
|
389
|
+
Check out the Google specification [here](http://support.google.com/webmasters/bin/answer.py?hl=en&answer=34648).
|
390
|
+
|
391
|
+
```ruby
|
392
|
+
config = { ... }
|
393
|
+
SiteMaps.use(:file_system, **config) do
|
394
|
+
process do |s|
|
395
|
+
s.add('/', mobile: true)
|
396
|
+
end
|
397
|
+
end
|
398
|
+
```
|
399
|
+
|
400
|
+
Supported attributes:
|
401
|
+
|
402
|
+
* `mobile` - Mobile attribute of the sitemap link.
|
95
403
|
|
404
|
+
## Adapters
|
405
|
+
|
406
|
+
The gem provides adapters to store the sitemaps in different locations. The following adapters are available:
|
407
|
+
|
408
|
+
* File System
|
409
|
+
* AWS S3
|
410
|
+
|
411
|
+
### File System
|
412
|
+
|
413
|
+
You can use the file system adapter to store the sitemaps in the file system. The configuration is simple, you just need to provide the directory where the sitemaps will be stored.
|
414
|
+
|
415
|
+
```ruby
|
416
|
+
|
417
|
+
SiteMaps.use(:file_system) do
|
96
418
|
configure do |config|
|
97
|
-
config.url = "https://example.com/sitemaps/sitemap.xml.gz"
|
419
|
+
config.url = "https://example.com/sitemaps/sitemap.xml.gz"
|
98
420
|
config.directory = "/home/www/public"
|
99
421
|
end
|
100
422
|
process do |s|
|
101
|
-
|
102
|
-
s.add(about_path, priority: 0.9, changefreq: "weekly")
|
423
|
+
# Add sitemap links
|
103
424
|
end
|
104
425
|
end
|
105
426
|
```
|
@@ -109,23 +430,107 @@ end
|
|
109
430
|
You can use the AWS S3 adapter to store the sitemaps in an S3 bucket. The configuration is similar to the file system adapter, but you need to provide the AWS SDK options.
|
110
431
|
|
111
432
|
```ruby
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
433
|
+
SiteMaps.use(:aws_sdk) do
|
434
|
+
configure do |config|
|
435
|
+
config.url = "https://my-bucket.s3.amazonaws.com/sitemaps/sitemap.xml"
|
436
|
+
config.directory = "/tmp" # Local directory to store the sitemaps before uploading to S3
|
437
|
+
# AWS S3 specific options
|
438
|
+
config.bucket = "my-bucket"
|
439
|
+
config.region = "us-east-1"
|
440
|
+
config.aws_access_key = ENV["AWS_ACCESS_KEY_ID"]
|
441
|
+
config.aws_secret_key = ENV["AWS_SECRET_ACCESS_KEY"]
|
442
|
+
# Optional parameters (default values)
|
443
|
+
config.acl = "public-read"
|
444
|
+
config.cache_control = "private, max-age=0, no-cache"
|
445
|
+
end
|
446
|
+
process do |s|
|
447
|
+
# Add sitemap links
|
448
|
+
end
|
449
|
+
end
|
450
|
+
```
|
451
|
+
|
452
|
+
If you want to let your rails application as a proxy to the sitemap files, you can create a controller to serve the sitemap files from the S3 bucket.
|
453
|
+
|
454
|
+
```ruby
|
455
|
+
# config/routes.rb
|
456
|
+
get "sitemaps/*relative_path", to: "sitemaps#show", as: :sitemap
|
457
|
+
```
|
458
|
+
|
459
|
+
```ruby
|
460
|
+
# app/controllers/sitemaps_controller.rb
|
461
|
+
class SitemapsController < ApplicationController
|
462
|
+
def show
|
463
|
+
location = params.permit("relative_path", "format").to_h.values.join(".")
|
464
|
+
|
465
|
+
unless location =~ /\.xml(\.gz)?$/ # You may want add more validations here
|
466
|
+
raise ActionController::RoutingError, "Not found"
|
467
|
+
end
|
468
|
+
|
469
|
+
data, meta = SiteMaps.current_adapter.read(File.join("sitemaps", location))
|
470
|
+
if location.ends_with?(".xml")
|
471
|
+
render xml: data
|
472
|
+
else
|
473
|
+
send_data(data, disposition: "attachment", type: meta[:content_type])
|
474
|
+
end
|
475
|
+
rescue SiteMaps::FileNotFoundError
|
476
|
+
raise ActionController::RoutingError, "Not found"
|
477
|
+
end
|
478
|
+
end
|
479
|
+
```
|
480
|
+
|
481
|
+
Make sure to let sitemap config in the initializer. You may want to add some caching to avoid hitting the S3 bucket on every request.
|
482
|
+
|
483
|
+
|
484
|
+
### Custom Adapters
|
485
|
+
|
486
|
+
You can create custom adapters to store the sitemaps in different locations. You just need to create a class that implements the `SiteMaps::Adapters::Adapter` interface. The adapter should implement the following methods:
|
487
|
+
|
488
|
+
* `write(url, raw_data, **extra)` - Write the sitemap data to the storage.
|
489
|
+
* `read(url)` - Read the sitemap data from the storage.
|
490
|
+
* `delete(url)` - Delete the sitemap data from the storage.
|
491
|
+
|
492
|
+
```ruby
|
493
|
+
class MyAdapter < SiteMaps::Adapters::Adapter
|
494
|
+
def write(url, raw_data, **extra)
|
495
|
+
# Write the sitemap data to the storage
|
496
|
+
end
|
497
|
+
|
498
|
+
def read(url)
|
499
|
+
# Read the sitemap data from the storage
|
500
|
+
end
|
121
501
|
|
122
|
-
|
502
|
+
def delete(url)
|
503
|
+
# Delete the sitemap data from the storage
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
SiteMaps.use(MyAdapter, **config) do
|
508
|
+
process do |s|
|
509
|
+
# Add sitemap links
|
510
|
+
end
|
511
|
+
end
|
512
|
+
```
|
513
|
+
|
514
|
+
#### Adapter Configuration
|
515
|
+
|
516
|
+
If you adapter requires additional configuration, you can define a `<adapter class>::Config` inheriting from `SiteMaps::Configuration` and implement the required configuration options.
|
517
|
+
|
518
|
+
```ruby
|
519
|
+
class MyAdapter::Config < SiteMaps::Configuration
|
520
|
+
attribute :api_key, String
|
521
|
+
end
|
522
|
+
```
|
523
|
+
|
524
|
+
During the adapter initialization, it will automatically detect the configuration class and use it to load the configuration options.
|
525
|
+
|
526
|
+
```ruby
|
527
|
+
SiteMaps.use(MyAdapter) do
|
123
528
|
configure do |config|
|
124
|
-
|
529
|
+
# ...
|
530
|
+
config.api_key = "my-api-key"
|
125
531
|
end
|
126
532
|
process do |s|
|
127
|
-
|
128
|
-
s.add('/about', priority: 0.9, changefreq: "weekly")
|
533
|
+
# Add sitemap links
|
129
534
|
end
|
130
535
|
end
|
131
536
|
```
|
@@ -141,22 +546,41 @@ bundle exec site_maps generate --config-file config/sitemap.rb
|
|
141
546
|
To enqueue dynamic processes, you can pass the process name with the context values.
|
142
547
|
|
143
548
|
```bash
|
144
|
-
bundle exec site_maps generate monthly_posts
|
549
|
+
bundle exec site_maps generate monthly_posts \
|
550
|
+
--config-file config/sitemap.rb \
|
551
|
+
--context=year:2021 month:1
|
552
|
+
```
|
553
|
+
|
554
|
+
Enqueue dynamic + remaining processes
|
555
|
+
|
556
|
+
```bash
|
557
|
+
bundle exec site_maps generate monthly_posts \
|
558
|
+
--config-file config/sitemap.rb \
|
559
|
+
--context=year:2021 month:1 \
|
560
|
+
--enqueue-remaining
|
561
|
+
```
|
562
|
+
|
563
|
+
passing max threads to run the processes in parallel
|
564
|
+
|
565
|
+
```bash
|
566
|
+
bundle exec site_maps generate \
|
567
|
+
--config-file config/sitemap.rb \
|
568
|
+
--max-threads 10
|
145
569
|
```
|
146
570
|
|
147
571
|
## Notification
|
148
572
|
|
149
573
|
You can subscribe to the internal events to receive notifications about the sitemap generation. The following events are available:
|
150
574
|
|
151
|
-
* `sitemaps.
|
152
|
-
* `sitemaps.
|
153
|
-
* `sitemaps.
|
154
|
-
* `sitemaps.
|
575
|
+
* `sitemaps.enqueue_process` - Triggered when a process is enqueued.
|
576
|
+
* `sitemaps.before_process_execution` - Triggered before a process starts execution
|
577
|
+
* `sitemaps.process_execution` - Triggered when a process finishes execution.
|
578
|
+
* `sitemaps.finalize_urlset` - Triggered when the sitemap builder finishes the URL set.
|
155
579
|
|
156
580
|
You can subscribe to the events using the following code:
|
157
581
|
|
158
582
|
```ruby
|
159
|
-
SiteMaps::Notification.subscribe("sitemaps.
|
583
|
+
SiteMaps::Notification.subscribe("sitemaps.enqueue_process") do |event|
|
160
584
|
puts "Enqueueing process #{event.payload[:name]}"
|
161
585
|
end
|
162
586
|
```
|
@@ -170,6 +594,43 @@ SiteMaps.generate(config_file: "config/sitemap.rb")
|
|
170
594
|
.run
|
171
595
|
```
|
172
596
|
|
597
|
+
## Mixins
|
598
|
+
|
599
|
+
You can use mixins to extend the sitemap builder with additional methods. The mixins can be used to define common methods that will be used in multiple processes. Make sure they are thread-safe, otherwise I recommend to define them in the process block.
|
600
|
+
|
601
|
+
```ruby
|
602
|
+
module MyMixin
|
603
|
+
def repository
|
604
|
+
Repository.new
|
605
|
+
end
|
606
|
+
|
607
|
+
def post_path(post)
|
608
|
+
"/posts/#{post.slug}"
|
609
|
+
end
|
610
|
+
end
|
611
|
+
|
612
|
+
SiteMaps.use(:file_system) do
|
613
|
+
include_module(MyMixin)
|
614
|
+
process do |s|
|
615
|
+
repository.posts.each do |post|
|
616
|
+
s.add(post_path(post), priority: 0.8)
|
617
|
+
end
|
618
|
+
end
|
619
|
+
end
|
620
|
+
```
|
621
|
+
|
622
|
+
We already have a built-in mixin for Rails applications that provides the url helpers through the `route` method.
|
623
|
+
|
624
|
+
```ruby
|
625
|
+
SiteMaps.use(:file_system) do
|
626
|
+
include_module(SiteMaps::Mixins::Rails)
|
627
|
+
process do |s|
|
628
|
+
s.add(route.root_path, priority: 1.0)
|
629
|
+
s.add(route.about_path, priority: 0.9)
|
630
|
+
end
|
631
|
+
end
|
632
|
+
```
|
633
|
+
|
173
634
|
## Development
|
174
635
|
|
175
636
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -28,9 +28,9 @@ module SiteMaps
|
|
28
28
|
|
29
29
|
include Publisher
|
30
30
|
|
31
|
-
register_event "sitemaps.
|
32
|
-
register_event "sitemaps.
|
33
|
-
register_event "sitemaps.
|
34
|
-
register_event "sitemaps.
|
31
|
+
register_event "sitemaps.finalize_urlset"
|
32
|
+
register_event "sitemaps.before_process_execution"
|
33
|
+
register_event "sitemaps.enqueue_process"
|
34
|
+
register_event "sitemaps.process_execution"
|
35
35
|
end
|
36
36
|
end
|
data/lib/site_maps/process.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "securerandom"
|
4
|
+
|
3
5
|
module SiteMaps
|
4
6
|
Process = Concurrent::ImmutableStruct.new(:name, :location_template, :kwargs_template, :block) do
|
7
|
+
def id
|
8
|
+
@id ||= SecureRandom.hex(4)
|
9
|
+
end
|
10
|
+
|
5
11
|
def location(**kwargs)
|
6
12
|
return unless location_template
|
7
13
|
|
@@ -13,54 +13,58 @@ module SiteMaps
|
|
13
13
|
method(:"on_#{method_name}")
|
14
14
|
end
|
15
15
|
|
16
|
-
def
|
16
|
+
def on_sitemaps_enqueue_process(event)
|
17
17
|
process = event[:process]
|
18
18
|
kwargs = event[:kwargs]
|
19
19
|
location = process.location(**kwargs)
|
20
20
|
print_message(
|
21
|
-
"Enqueue process %<name>s#{" at %<location>s" if location}",
|
21
|
+
"[%<id>s] Enqueue process %<name>s#{" at %<location>s" if location}",
|
22
|
+
id: process.id,
|
22
23
|
name: colorize(process.name, :bold),
|
23
24
|
location: colorize(location, :lightgray)
|
24
25
|
)
|
25
26
|
if kwargs.any?
|
26
|
-
print_message("
|
27
|
+
print_message(" └──── Context: {%<kwargs>s}", kwargs: kwargs.map { |k, v| "#{k}: #{v.inspect}" }.join(", "))
|
27
28
|
end
|
28
29
|
end
|
29
30
|
|
30
|
-
def
|
31
|
+
def on_sitemaps_before_process_execution(event)
|
31
32
|
process = event[:process]
|
32
33
|
kwargs = event[:kwargs]
|
33
34
|
location = process.location(**kwargs)
|
34
35
|
print_message(
|
35
|
-
"Executing process %<name>s#{" at %<location>s" if location}",
|
36
|
+
"[%<id>s] Executing process %<name>s#{" at %<location>s" if location}",
|
37
|
+
id: process.id,
|
36
38
|
name: colorize(process.name, :bold),
|
37
39
|
location: colorize(location, :lightgray)
|
38
40
|
)
|
39
41
|
if kwargs.any?
|
40
|
-
print_message("
|
42
|
+
print_message(" └──── Context: {%<kwargs>s}", kwargs: kwargs.map { |k, v| "#{k}: #{v.inspect}" }.join(", "))
|
41
43
|
end
|
42
44
|
end
|
43
45
|
|
44
|
-
def
|
46
|
+
def on_sitemaps_process_execution(event)
|
45
47
|
process = event[:process]
|
46
48
|
kwargs = event[:kwargs]
|
47
49
|
location = process.location(**kwargs)
|
48
50
|
print_message(
|
49
|
-
"[%<runtime>s] Executed process %<name>s#{" at %<location>s" if location}",
|
51
|
+
"[%<id>s][%<runtime>s] Executed process %<name>s#{" at %<location>s" if location}",
|
52
|
+
id: process.id,
|
50
53
|
name: colorize(process.name, :bold),
|
51
54
|
location: colorize(location, :lightgray),
|
52
55
|
runtime: formatted_runtime(event[:runtime])
|
53
56
|
)
|
54
57
|
if kwargs.any?
|
55
|
-
print_message("
|
58
|
+
print_message(" └──── Context: {%<kwargs>s}", kwargs: kwargs.map { |k, v| "#{k}: #{v.inspect}" }.join(", "))
|
56
59
|
end
|
57
60
|
end
|
58
61
|
|
59
|
-
def
|
62
|
+
def on_sitemaps_finalize_urlset(event)
|
63
|
+
process = event[:process]
|
60
64
|
links_count = event[:links_count]
|
61
65
|
news_count = event[:news_count]
|
62
66
|
url = event[:url]
|
63
|
-
text = +"[%<runtime>s] Finalize URLSet with "
|
67
|
+
text = +"[%<id>s][%<runtime>s] Finalize URLSet with "
|
64
68
|
text << "%<links>d links" if links_count > 0
|
65
69
|
text << " and " if links_count > 0 && news_count > 0
|
66
70
|
text << "%<news>d news" if news_count > 0
|
@@ -68,6 +72,7 @@ module SiteMaps
|
|
68
72
|
|
69
73
|
print_message(
|
70
74
|
text,
|
75
|
+
id: process.id,
|
71
76
|
links: links_count,
|
72
77
|
news: news_count,
|
73
78
|
url: colorize(url, :lightgray),
|
data/lib/site_maps/runner.rb
CHANGED
@@ -18,7 +18,7 @@ module SiteMaps
|
|
18
18
|
raise ArgumentError, "Process :#{process_name} not found"
|
19
19
|
end
|
20
20
|
kwargs = process.keyword_arguments(kwargs)
|
21
|
-
SiteMaps::Notification.instrument("sitemaps.
|
21
|
+
SiteMaps::Notification.instrument("sitemaps.enqueue_process") do |payload|
|
22
22
|
payload[:process] = process
|
23
23
|
payload[:kwargs] = kwargs
|
24
24
|
if process.dynamic?
|
@@ -53,15 +53,16 @@ module SiteMaps
|
|
53
53
|
futures = []
|
54
54
|
@execution.each do |_process_name, items|
|
55
55
|
items.each do |process, kwargs|
|
56
|
-
SiteMaps::Notification.publish("sitemaps.
|
56
|
+
SiteMaps::Notification.publish("sitemaps.before_process_execution", process: process, kwargs: kwargs)
|
57
57
|
futures << Concurrent::Future.execute(executor: pool) do
|
58
58
|
wrap_process_execution(process) do
|
59
|
-
SiteMaps::Notification.instrument("sitemaps.
|
59
|
+
SiteMaps::Notification.instrument("sitemaps.process_execution") do |payload|
|
60
60
|
payload[:process] = process
|
61
61
|
payload[:kwargs] = kwargs
|
62
62
|
builder = SiteMaps::SitemapBuilder.new(
|
63
63
|
adapter: adapter,
|
64
|
-
location: process.location(**kwargs)
|
64
|
+
location: process.location(**kwargs),
|
65
|
+
notification_payload: { process: process }
|
65
66
|
)
|
66
67
|
process.call(builder, **kwargs)
|
67
68
|
builder.finalize!
|
@@ -4,11 +4,12 @@ module SiteMaps
|
|
4
4
|
class SitemapBuilder
|
5
5
|
extend Forwardable
|
6
6
|
|
7
|
-
def initialize(adapter:, location: nil)
|
7
|
+
def initialize(adapter:, location: nil, notification_payload: {})
|
8
8
|
@adapter = adapter
|
9
9
|
@url_set = SiteMaps::Builder::URLSet.new
|
10
10
|
@location = location
|
11
11
|
@mutex = Mutex.new
|
12
|
+
@notification_payload = notification_payload
|
12
13
|
end
|
13
14
|
|
14
15
|
def add(path, params: nil, **options)
|
@@ -31,7 +32,7 @@ module SiteMaps
|
|
31
32
|
|
32
33
|
raw_data = url_set.finalize!
|
33
34
|
|
34
|
-
SiteMaps::Notification.instrument("sitemaps.
|
35
|
+
SiteMaps::Notification.instrument("sitemaps.finalize_urlset", notification_payload) do |payload|
|
35
36
|
payload[:links_count] = url_set.links_count
|
36
37
|
payload[:news_count] = url_set.news_count
|
37
38
|
payload[:last_modified] = url_set.last_modified
|
@@ -50,13 +51,13 @@ module SiteMaps
|
|
50
51
|
|
51
52
|
protected
|
52
53
|
|
53
|
-
attr_reader :url_set, :adapter, :location
|
54
|
+
attr_reader :url_set, :adapter, :location, :notification_payload
|
54
55
|
|
55
56
|
def_delegators :adapter, :sitemap_index, :config, :repo
|
56
57
|
|
57
58
|
def finalize_and_start_next_urlset!
|
58
59
|
raw_data = url_set.finalize!
|
59
|
-
SiteMaps::Notification.instrument("sitemaps.
|
60
|
+
SiteMaps::Notification.instrument("sitemaps.finalize_urlset", notification_payload) do |payload|
|
60
61
|
sitemap_url = repo.generate_url(location)
|
61
62
|
payload[:url] = sitemap_url
|
62
63
|
payload[:links_count] = url_set.links_count
|
@@ -5,8 +5,8 @@ require "open-uri"
|
|
5
5
|
module SiteMaps
|
6
6
|
class SitemapReader
|
7
7
|
Error = Class.new(SiteMaps::Error)
|
8
|
-
|
9
|
-
|
8
|
+
FileNotFoundError = Class.new(Error)
|
9
|
+
MalformedFileError = Class.new(Error)
|
10
10
|
|
11
11
|
def initialize(location)
|
12
12
|
@location = Pathname.new(location)
|
@@ -19,7 +19,7 @@ module SiteMaps
|
|
19
19
|
read_file.read
|
20
20
|
end
|
21
21
|
rescue Zlib::GzipFile::Error => _e
|
22
|
-
raise
|
22
|
+
raise MalformedFileError.new("The file #{@location} is not a valid Gzip file")
|
23
23
|
end
|
24
24
|
|
25
25
|
def to_doc
|
@@ -40,9 +40,9 @@ module SiteMaps
|
|
40
40
|
::File.open(@location, "r")
|
41
41
|
end
|
42
42
|
rescue Errno::ENOENT
|
43
|
-
raise
|
43
|
+
raise FileNotFoundError.new("The file #{@location} does not exist")
|
44
44
|
rescue OpenURI::HTTPError
|
45
|
-
raise
|
45
|
+
raise FileNotFoundError.new("The file #{@location} could not be opened")
|
46
46
|
end
|
47
47
|
|
48
48
|
def compressed?
|
data/lib/site_maps/version.rb
CHANGED
data/site_maps.gemspec
CHANGED
@@ -9,10 +9,10 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ["mgzmaster@gmail.com"]
|
10
10
|
|
11
11
|
spec.summary = <<~SUMMARY
|
12
|
-
|
12
|
+
Concurrent and Incremental sitemap.xml builder for ruby applications
|
13
13
|
SUMMARY
|
14
14
|
spec.description = <<~DESCRIPTION
|
15
|
-
SiteMaps is
|
15
|
+
SiteMaps is a framework-agnostic library for building sitemap.xml files in a concurrent and incremental way.
|
16
16
|
DESCRIPTION
|
17
17
|
|
18
18
|
spec.homepage = "https://github.com/marcosgz/site_maps"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: site_maps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.1.
|
4
|
+
version: 0.0.1.beta3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marcos G. Zimmermann
|
8
8
|
autorequire:
|
9
9
|
bindir: exec
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-11-
|
11
|
+
date: 2024-11-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -80,8 +80,8 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.0.0
|
83
|
-
description: 'SiteMaps is
|
84
|
-
|
83
|
+
description: 'SiteMaps is a framework-agnostic library for building sitemap.xml files
|
84
|
+
in a concurrent and incremental way.
|
85
85
|
|
86
86
|
'
|
87
87
|
email:
|
@@ -168,5 +168,5 @@ requirements: []
|
|
168
168
|
rubygems_version: 3.5.21
|
169
169
|
signing_key:
|
170
170
|
specification_version: 4
|
171
|
-
summary:
|
171
|
+
summary: Concurrent and Incremental sitemap.xml builder for ruby applications
|
172
172
|
test_files: []
|