site_maps 0.0.1.beta3 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +2 -4
  3. data/.rubocop.yml +4 -2
  4. data/.tool-versions +1 -1
  5. data/AGENTS.md +73 -0
  6. data/CHANGELOG.md +5 -0
  7. data/CLAUDE.md +77 -0
  8. data/Gemfile +1 -0
  9. data/Gemfile.lock +72 -56
  10. data/README.md +531 -393
  11. data/docs/README.md +67 -0
  12. data/docs/adapters.md +143 -0
  13. data/docs/api.md +154 -0
  14. data/docs/cli.md +93 -0
  15. data/docs/events.md +79 -0
  16. data/docs/extensions.md +141 -0
  17. data/docs/getting-started.md +138 -0
  18. data/docs/middleware.md +85 -0
  19. data/docs/processes.md +156 -0
  20. data/docs/rails.md +128 -0
  21. data/lib/site_maps/adapters/adapter.rb +35 -5
  22. data/lib/site_maps/adapters/aws_sdk/storage.rb +5 -2
  23. data/lib/site_maps/builder/sitemap_index/item.rb +1 -1
  24. data/lib/site_maps/builder/sitemap_index.rb +29 -5
  25. data/lib/site_maps/builder/url.rb +13 -10
  26. data/lib/site_maps/builder/url_set.rb +17 -7
  27. data/lib/site_maps/builder/xsl_stylesheet.rb +192 -0
  28. data/lib/site_maps/cli.rb +6 -2
  29. data/lib/site_maps/configuration.rb +8 -1
  30. data/lib/site_maps/incremental_location.rb +1 -1
  31. data/lib/site_maps/middleware.rb +197 -0
  32. data/lib/site_maps/notification/event.rb +1 -1
  33. data/lib/site_maps/notification/publisher.rb +1 -0
  34. data/lib/site_maps/notification.rb +1 -0
  35. data/lib/site_maps/ping.rb +35 -0
  36. data/lib/site_maps/{primitives → primitive}/array.rb +1 -1
  37. data/lib/site_maps/{primitives → primitive}/output.rb +1 -1
  38. data/lib/site_maps/primitive/string.rb +106 -0
  39. data/lib/site_maps/robots_txt.rb +21 -0
  40. data/lib/site_maps/runner/event_listener.rb +2 -2
  41. data/lib/site_maps/runner.rb +17 -3
  42. data/lib/site_maps/sitemap_builder.rb +16 -4
  43. data/lib/site_maps/sitemap_reader.rb +3 -0
  44. data/lib/site_maps/version.rb +1 -1
  45. data/lib/site_maps.rb +81 -10
  46. data/site_maps.gemspec +1 -1
  47. metadata +23 -10
  48. data/lib/site_maps/primitives/string.rb +0 -43
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0b92468df93a09f176223cd329a97253051d3beece1ebd3c33efe2a8a23b109
4
- data.tar.gz: 6d47acbaa8f176cd931929100a26ea1c3184005c6a127dc69bea41fa5ff6194f
3
+ metadata.gz: 9246737e11d28f750005fedc117c0c2afb8c5f5ef606218fc505e1042c4da002
4
+ data.tar.gz: 80f005cca72244cfe1044c30cf91d1656b22bb7ff3047a2fd159eee2ddd2f3a5
5
5
  SHA512:
6
- metadata.gz: 58bf88572f4e54bbdf784d33146908198d6fccb4558573946f84ee566a1b2fcb600eada66d81e0c2bc6b4529cee9eb7e631559f1ee863756d8ed561f13beb818
7
- data.tar.gz: 4c432015c4a7e2463a22f47742ad779efe2fbcfa7577c408ad50681855bc2b6d8697cde7bc04acb9f8c2113d6f8d41c0571e3c40de53513cf6aa3275ba60a635
6
+ metadata.gz: 6ebb8a69803018a65c995e61cc714385ae965b04fc8df330cb7a5e99fd37539568cc1661e86da70f1e7e90eef108ba9a9c5ba1bc6e4b50d5c17f41856e4df934
7
+ data.tar.gz: 9dbf41444903708643f4639ae773470fc2043eeb0deb9061dedccef5ceda30210435db6faec1d1f949baa8200841d7245c40ac97df88f8993b57e5c1caeaa41b
@@ -14,7 +14,7 @@ jobs:
14
14
  - uses: actions/checkout@v4
15
15
  - uses: ruby/setup-ruby@v1
16
16
  with:
17
- ruby-version: "3.0"
17
+ ruby-version: "3.2"
18
18
  bundler-cache: true
19
19
  - name: Run rubocop
20
20
  run: |
@@ -26,10 +26,9 @@ jobs:
26
26
  fail-fast: false
27
27
  matrix:
28
28
  ruby-version:
29
- - "3.0"
30
- - "3.1"
31
29
  - "3.2"
32
30
  - "3.3"
31
+ - "3.4"
33
32
  steps:
34
33
  - name: Checkout code
35
34
  uses: actions/checkout@v4
@@ -42,4 +41,3 @@ jobs:
42
41
  run: bundle install --jobs 4 --retry 3
43
42
  - name: Run tests
44
43
  run: bundle exec rspec
45
-
data/.rubocop.yml CHANGED
@@ -2,16 +2,18 @@ inherit_mode:
2
2
  merge:
3
3
  - Exclude
4
4
 
5
- require:
5
+ plugins:
6
6
  - rubocop-performance
7
7
  - rubocop-rspec
8
+
9
+ require:
8
10
  - standard/cop/block_single_line_braces
9
11
 
10
12
  inherit_gem:
11
13
  standard: config/base.yml
12
14
 
13
15
  AllCops:
14
- TargetRubyVersion: 2.5
16
+ TargetRubyVersion: 3.2
15
17
  SuggestExtensions: false
16
18
  Exclude:
17
19
  - "db/**/*"
data/.tool-versions CHANGED
@@ -1 +1 @@
1
- ruby 3.3.5
1
+ ruby 3.4.9
data/AGENTS.md ADDED
@@ -0,0 +1,73 @@
1
+ # AGENTS.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Commands
6
+
7
+ ```bash
8
+ bundle exec rspec # Run all tests
9
+ bundle exec rspec spec/path/to_spec.rb # Run a single spec file
10
+ bundle exec rspec spec/path/to_spec.rb:42 # Run a single example by line
11
+ bundle exec rubocop # Run linter
12
+ bundle exec rubocop -a # Auto-correct offenses
13
+ bundle exec rake install # Install gem locally
14
+ ```
15
+
16
+ ## Architecture
17
+
18
+ `SiteMaps` is a concurrent, incremental sitemap.xml generator gem. It is framework-agnostic but ships with a Rails Railtie.
19
+
20
+ ### Adapter Pattern
21
+
22
+ Storage backends (`FileSystem`, `AwsSdk`, `Noop`) all inherit from `Adapters::Adapter`. Each adapter can ship its own `Config` class (inheriting `SiteMaps::Configuration`) and optional process mixins. The active adapter is stored in `SiteMaps.current_adapter` and set via `SiteMaps.use(:adapter_name, **opts)`.
23
+
24
+ ### Process Model
25
+
26
+ Work is declared as named **processes**. A process can be **static** (executed once, fixed location) or **dynamic** (executed multiple times with a location template like `posts/%{year}-%{month}/sitemap.xml`). Processes are stored as immutable structs with block callbacks and orchestrated by `Runner`.
27
+
28
+ ### Runner and Concurrency
29
+
30
+ `Runner` manages a `concurrent-ruby` `FixedThreadPool` (default 4 threads). Processes are enqueued with `enqueue(:name, **args)`, `enqueue_remaining`, or `enqueue_all`, then executed by calling `run`. `AtomicRepository` provides thread-safe tracking of processes and URL sets. `SitemapBuilder` uses a `Mutex` for synchronization.
31
+
32
+ ### XML Generation
33
+
34
+ `Builder::URLSet` writes a single sitemap XML file (max 50,000 links or ~50MB). When a limit is reached, `IncrementalLocation` generates the next filename automatically. `Builder::SitemapIndex` aggregates multiple URLSets. `Builder::URL` handles sitemap extensions: images, videos, news, alternates, mobile, and pagemap.
35
+
36
+ ### Notification System
37
+
38
+ `Notification::Publisher` is a mixin that enables event-driven hooks. Key events: `sitemaps.enqueue_process`, `sitemaps.before_process_execution`, `sitemaps.process_execution`, `sitemaps.finalize_urlset`. Subscribe via `SiteMaps.subscribe(event) { |payload| ... }`.
39
+
40
+ ### Configuration
41
+
42
+ `Configuration` uses an `attribute` macro for declaring settings with defaults. Adapter-specific configs inherit from the base class. Config is set with `SiteMaps.configure { |c| ... }` or inline inside `SiteMaps.use`.
43
+
44
+ ### CLI
45
+
46
+ Implemented with Thor in `lib/site_maps/cli.rb`, exposed via the `exec/site_maps` binary. The main command is `site_maps generate [processes]` with flags `--config-file`, `--max-threads`, `--context` (key:value pairs for dynamic processes), `--enqueue-remaining`, `--debug`, and `--logfile`.
47
+
48
+ ### Rails Integration
49
+
50
+ `lib/site_maps/railtie.rb` is auto-loaded when Rails is present. It injects Rails URL helpers into process blocks via a `route` helper method.
51
+
52
+ ## Typical Usage Pattern
53
+
54
+ ```ruby
55
+ # config/sitemap.rb
56
+ SiteMaps.use(:file_system) do
57
+ configure do |config|
58
+ config.url = "https://example.com/sitemaps/sitemap.xml.gz"
59
+ config.directory = Rails.public_path.to_s
60
+ end
61
+
62
+ process do |s| # static, default process
63
+ s.add("/", priority: 1.0)
64
+ end
65
+
66
+ process :posts, "posts/%{year}-%{month}/sitemap.xml", year: 2024, month: 1 do |s, year:, month:|
67
+ Post.where(year: year, month: month).find_each { |p| s.add(post_path(p)) }
68
+ end
69
+ end
70
+
71
+ # Generate
72
+ SiteMaps.generate(config_file: "config/sitemap.rb").enqueue_all.run
73
+ ```
data/CHANGELOG.md CHANGED
@@ -4,5 +4,10 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## 0.1.1 - 2026-05-12
8
+
9
+ ### Fixed
10
+ - AwsSdk adapter: switched from the deprecated `Aws::S3::Object#upload_file` to `Aws::S3::TransferManager#upload_file` to silence the deprecation warning and keep working past the next aws-sdk-s3 major.
11
+
7
12
  ## 0.0.1.beta1 - 2024-11-07
8
13
  The first release of the gem
data/CLAUDE.md ADDED
@@ -0,0 +1,77 @@
1
+ # AGENTS.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Commands
6
+
7
+ ```bash
8
+ bundle exec rspec # Run all tests
9
+ bundle exec rspec spec/path/to_spec.rb # Run a single spec file
10
+ bundle exec rspec spec/path/to_spec.rb:42 # Run a single example by line
11
+ bundle exec rubocop # Run linter
12
+ bundle exec rubocop -a # Auto-correct offenses
13
+ bundle exec rake install # Install gem locally
14
+ ```
15
+
16
+ ## Architecture
17
+
18
+ `SiteMaps` is a concurrent, incremental sitemap.xml generator gem. It is framework-agnostic but ships with a Rails Railtie.
19
+
20
+ ### Adapter Pattern
21
+
22
+ Storage backends (`FileSystem`, `AwsSdk`, `Noop`) all inherit from `Adapters::Adapter`. Each adapter can ship its own `Config` class (inheriting `SiteMaps::Configuration`) and optional process mixins. The active adapter is stored in `SiteMaps.current_adapter` and set via `SiteMaps.use(:adapter_name, **opts)`.
23
+
24
+ ### Process Model
25
+
26
+ Work is declared as named **processes**. A process can be **static** (executed once, fixed location) or **dynamic** (executed multiple times with a location template like `posts/%{year}-%{month}/sitemap.xml`). Processes are stored as immutable structs with block callbacks and orchestrated by `Runner`.
27
+
28
+ ### Runner and Concurrency
29
+
30
+ `Runner` manages a `concurrent-ruby` `FixedThreadPool` (default 4 threads). Processes are enqueued with `enqueue(:name, **args)`, `enqueue_remaining`, or `enqueue_all`, then executed by calling `run`. `AtomicRepository` provides thread-safe tracking of processes and URL sets. `SitemapBuilder` uses a `Mutex` for synchronization.
31
+
32
+ ### XML Generation
33
+
34
+ `Builder::URLSet` writes a single sitemap XML file (max 50,000 links or ~50MB). When a limit is reached, `IncrementalLocation` generates the next filename automatically. `Builder::SitemapIndex` aggregates multiple URLSets. `Builder::URL` handles sitemap extensions: images, videos, news, alternates, mobile, and pagemap.
35
+
36
+ ### Notification System
37
+
38
+ `Notification::Publisher` is a mixin that enables event-driven hooks. Key events: `sitemaps.enqueue_process`, `sitemaps.before_process_execution`, `sitemaps.process_execution`, `sitemaps.finalize_urlset`. Subscribe via `SiteMaps.subscribe(event) { |payload| ... }`.
39
+
40
+ ### Configuration
41
+
42
+ `Configuration` uses an `attribute` macro for declaring settings with defaults. Adapter-specific configs inherit from the base class. Config is set with `SiteMaps.configure { |c| ... }` or inline inside `SiteMaps.use`.
43
+
44
+ ### CLI
45
+
46
+ Implemented with Thor in `lib/site_maps/cli.rb`, exposed via the `exec/site_maps` binary. The main command is `site_maps generate [processes]` with flags `--config-file`, `--max-threads`, `--context` (key:value pairs for dynamic processes), `--enqueue-remaining`, `--ping`, `--debug`, and `--logfile`.
47
+
48
+ ### Rails Integration
49
+
50
+ `lib/site_maps/railtie.rb` is auto-loaded when Rails is present. It injects Rails URL helpers into process blocks via a `route` helper method.
51
+
52
+ ## Development Guidelines
53
+
54
+ - **Any new public-facing feature** (new option, method, CLI flag, or behaviour change) must be documented in `README.md` before the work is considered complete. This includes: new `generate`/`Runner` options, new middleware options, new CLI flags, new config attributes, and new DSL methods.
55
+
56
+ ## Typical Usage Pattern
57
+
58
+ ```ruby
59
+ # config/sitemap.rb
60
+ SiteMaps.use(:file_system) do
61
+ configure do |config|
62
+ config.url = "https://example.com/sitemaps/sitemap.xml.gz"
63
+ config.directory = Rails.public_path.to_s
64
+ end
65
+
66
+ process do |s| # static, default process
67
+ s.add("/", priority: 1.0)
68
+ end
69
+
70
+ process :posts, "posts/%{year}-%{month}/sitemap.xml", year: 2024, month: 1 do |s, year:, month:|
71
+ Post.where(year: year, month: month).find_each { |p| s.add(post_path(p)) }
72
+ end
73
+ end
74
+
75
+ # Generate
76
+ SiteMaps.generate(config_file: "config/sitemap.rb").enqueue_all.run
77
+ ```
data/Gemfile CHANGED
@@ -3,6 +3,7 @@
3
3
  source "https://rubygems.org"
4
4
 
5
5
  gem "aws-sdk-s3"
6
+ gem "base64"
6
7
  gem "dotenv"
7
8
  gem "nokogiri"
8
9
  gem "pry"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- site_maps (0.0.1.beta3)
4
+ site_maps (0.1.1)
5
5
  builder (~> 3.0)
6
6
  concurrent-ruby (>= 1.1)
7
7
  rack (>= 2.0)
@@ -11,113 +11,129 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- addressable (2.8.7)
15
- public_suffix (>= 2.0.2, < 7.0)
16
- ast (2.4.2)
17
- aws-eventstream (1.3.0)
18
- aws-partitions (1.1003.0)
19
- aws-sdk-core (3.212.0)
14
+ addressable (2.9.0)
15
+ public_suffix (>= 2.0.2, < 8.0)
16
+ ast (2.4.3)
17
+ aws-eventstream (1.4.0)
18
+ aws-partitions (1.1238.0)
19
+ aws-sdk-core (3.244.0)
20
20
  aws-eventstream (~> 1, >= 1.3.0)
21
21
  aws-partitions (~> 1, >= 1.992.0)
22
22
  aws-sigv4 (~> 1.9)
23
+ base64
24
+ bigdecimal
23
25
  jmespath (~> 1, >= 1.6.1)
24
- aws-sdk-kms (1.95.0)
25
- aws-sdk-core (~> 3, >= 3.210.0)
26
+ logger
27
+ aws-sdk-kms (1.123.0)
28
+ aws-sdk-core (~> 3, >= 3.244.0)
26
29
  aws-sigv4 (~> 1.5)
27
- aws-sdk-s3 (1.170.0)
28
- aws-sdk-core (~> 3, >= 3.210.0)
30
+ aws-sdk-s3 (1.219.0)
31
+ aws-sdk-core (~> 3, >= 3.244.0)
29
32
  aws-sdk-kms (~> 1)
30
33
  aws-sigv4 (~> 1.5)
31
- aws-sigv4 (1.10.1)
34
+ aws-sigv4 (1.12.1)
32
35
  aws-eventstream (~> 1, >= 1.0.2)
33
- bigdecimal (3.1.8)
36
+ base64 (0.3.0)
37
+ bigdecimal (4.1.1)
34
38
  builder (3.3.0)
35
39
  coderay (1.1.3)
36
- concurrent-ruby (1.3.4)
37
- crack (1.0.0)
40
+ concurrent-ruby (1.3.6)
41
+ crack (1.0.1)
38
42
  bigdecimal
39
43
  rexml
40
- diff-lcs (1.5.1)
41
- dotenv (2.8.1)
42
- hashdiff (1.1.1)
44
+ diff-lcs (1.6.2)
45
+ dotenv (3.2.0)
46
+ hashdiff (1.2.1)
47
+ io-console (0.8.2)
43
48
  jmespath (1.6.2)
44
- json (2.8.1)
45
- language_server-protocol (3.17.0.3)
49
+ json (2.19.3)
50
+ language_server-protocol (3.17.0.5)
46
51
  lint_roller (1.1.0)
52
+ logger (1.7.0)
47
53
  method_source (1.1.0)
48
- nokogiri (1.16.7-x86_64-linux)
54
+ nokogiri (1.19.2-x86_64-linux-gnu)
49
55
  racc (~> 1.4)
50
- parallel (1.26.3)
51
- parser (3.3.6.0)
56
+ parallel (1.28.0)
57
+ parser (3.3.11.1)
52
58
  ast (~> 2.4.1)
53
59
  racc
54
- pry (0.14.2)
60
+ prism (1.9.0)
61
+ pry (0.16.0)
55
62
  coderay (~> 1.1)
56
63
  method_source (~> 1.0)
57
- public_suffix (5.1.1)
64
+ reline (>= 0.6.0)
65
+ public_suffix (7.0.5)
58
66
  racc (1.8.1)
59
- rack (3.1.8)
67
+ rack (3.2.6)
60
68
  rainbow (3.1.1)
61
- regexp_parser (2.9.2)
62
- rexml (3.3.9)
63
- rspec (3.13.0)
69
+ regexp_parser (2.12.0)
70
+ reline (0.6.3)
71
+ io-console (~> 0.5)
72
+ rexml (3.4.4)
73
+ rspec (3.13.2)
64
74
  rspec-core (~> 3.13.0)
65
75
  rspec-expectations (~> 3.13.0)
66
76
  rspec-mocks (~> 3.13.0)
67
- rspec-core (3.13.2)
77
+ rspec-core (3.13.6)
68
78
  rspec-support (~> 3.13.0)
69
- rspec-expectations (3.13.3)
79
+ rspec-expectations (3.13.5)
70
80
  diff-lcs (>= 1.2.0, < 2.0)
71
81
  rspec-support (~> 3.13.0)
72
- rspec-mocks (3.13.2)
82
+ rspec-mocks (3.13.8)
73
83
  diff-lcs (>= 1.2.0, < 2.0)
74
84
  rspec-support (~> 3.13.0)
75
- rspec-support (3.13.1)
76
- rubocop (1.64.1)
85
+ rspec-support (3.13.7)
86
+ rubocop (1.84.2)
77
87
  json (~> 2.3)
78
- language_server-protocol (>= 3.17.0)
88
+ language_server-protocol (~> 3.17.0.2)
89
+ lint_roller (~> 1.1.0)
79
90
  parallel (~> 1.10)
80
91
  parser (>= 3.3.0.2)
81
92
  rainbow (>= 2.2.2, < 4.0)
82
- regexp_parser (>= 1.8, < 3.0)
83
- rexml (>= 3.2.5, < 4.0)
84
- rubocop-ast (>= 1.31.1, < 2.0)
93
+ regexp_parser (>= 2.9.3, < 3.0)
94
+ rubocop-ast (>= 1.49.0, < 2.0)
85
95
  ruby-progressbar (~> 1.7)
86
- unicode-display_width (>= 2.4.0, < 3.0)
87
- rubocop-ast (1.34.1)
88
- parser (>= 3.3.1.0)
89
- rubocop-performance (1.21.1)
90
- rubocop (>= 1.48.1, < 2.0)
91
- rubocop-ast (>= 1.31.1, < 2.0)
92
- rubocop-rspec (3.2.0)
93
- rubocop (~> 1.61)
96
+ unicode-display_width (>= 2.4.0, < 4.0)
97
+ rubocop-ast (1.49.1)
98
+ parser (>= 3.3.7.2)
99
+ prism (~> 1.7)
100
+ rubocop-performance (1.26.1)
101
+ lint_roller (~> 1.1)
102
+ rubocop (>= 1.75.0, < 2.0)
103
+ rubocop-ast (>= 1.47.1, < 2.0)
104
+ rubocop-rspec (3.9.0)
105
+ lint_roller (~> 1.1)
106
+ rubocop (~> 1.81)
94
107
  ruby-progressbar (1.13.0)
95
- standard (1.37.0)
108
+ standard (1.54.0)
96
109
  language_server-protocol (~> 3.17.0.2)
97
110
  lint_roller (~> 1.0)
98
- rubocop (~> 1.64.0)
111
+ rubocop (~> 1.84.0)
99
112
  standard-custom (~> 1.0.0)
100
- standard-performance (~> 1.4)
113
+ standard-performance (~> 1.8)
101
114
  standard-custom (1.0.2)
102
115
  lint_roller (~> 1.0)
103
116
  rubocop (~> 1.50)
104
- standard-performance (1.4.0)
117
+ standard-performance (1.9.0)
105
118
  lint_roller (~> 1.1)
106
- rubocop-performance (~> 1.21.0)
107
- thor (1.3.2)
108
- timecop (0.9.10)
109
- unicode-display_width (2.6.0)
110
- webmock (3.24.0)
119
+ rubocop-performance (~> 1.26.0)
120
+ thor (1.5.0)
121
+ timecop (0.9.11)
122
+ unicode-display_width (3.2.0)
123
+ unicode-emoji (~> 4.1)
124
+ unicode-emoji (4.2.0)
125
+ webmock (3.26.2)
111
126
  addressable (>= 2.8.0)
112
127
  crack (>= 0.3.2)
113
128
  hashdiff (>= 0.4.0, < 2.0.0)
114
- zeitwerk (2.7.1)
129
+ zeitwerk (2.7.5)
115
130
 
116
131
  PLATFORMS
117
132
  x86_64-linux
118
133
 
119
134
  DEPENDENCIES
120
135
  aws-sdk-s3
136
+ base64
121
137
  dotenv
122
138
  nokogiri
123
139
  pry