site_maps 0.0.1.beta3 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +2 -4
  3. data/.rubocop.yml +4 -2
  4. data/.tool-versions +1 -1
  5. data/AGENTS.md +73 -0
  6. data/CHANGELOG.md +5 -0
  7. data/CLAUDE.md +77 -0
  8. data/Gemfile +1 -0
  9. data/Gemfile.lock +72 -56
  10. data/README.md +531 -393
  11. data/docs/README.md +67 -0
  12. data/docs/adapters.md +143 -0
  13. data/docs/api.md +154 -0
  14. data/docs/cli.md +93 -0
  15. data/docs/events.md +79 -0
  16. data/docs/extensions.md +141 -0
  17. data/docs/getting-started.md +138 -0
  18. data/docs/middleware.md +85 -0
  19. data/docs/processes.md +156 -0
  20. data/docs/rails.md +128 -0
  21. data/lib/site_maps/adapters/adapter.rb +35 -5
  22. data/lib/site_maps/adapters/aws_sdk/storage.rb +5 -2
  23. data/lib/site_maps/builder/sitemap_index/item.rb +1 -1
  24. data/lib/site_maps/builder/sitemap_index.rb +29 -5
  25. data/lib/site_maps/builder/url.rb +13 -10
  26. data/lib/site_maps/builder/url_set.rb +17 -7
  27. data/lib/site_maps/builder/xsl_stylesheet.rb +192 -0
  28. data/lib/site_maps/cli.rb +6 -2
  29. data/lib/site_maps/configuration.rb +8 -1
  30. data/lib/site_maps/incremental_location.rb +1 -1
  31. data/lib/site_maps/middleware.rb +197 -0
  32. data/lib/site_maps/notification/event.rb +1 -1
  33. data/lib/site_maps/notification/publisher.rb +1 -0
  34. data/lib/site_maps/notification.rb +1 -0
  35. data/lib/site_maps/ping.rb +35 -0
  36. data/lib/site_maps/{primitives → primitive}/array.rb +1 -1
  37. data/lib/site_maps/{primitives → primitive}/output.rb +1 -1
  38. data/lib/site_maps/primitive/string.rb +106 -0
  39. data/lib/site_maps/robots_txt.rb +21 -0
  40. data/lib/site_maps/runner/event_listener.rb +2 -2
  41. data/lib/site_maps/runner.rb +17 -3
  42. data/lib/site_maps/sitemap_builder.rb +16 -4
  43. data/lib/site_maps/sitemap_reader.rb +3 -0
  44. data/lib/site_maps/version.rb +1 -1
  45. data/lib/site_maps.rb +81 -10
  46. data/site_maps.gemspec +1 -1
  47. metadata +23 -10
  48. data/lib/site_maps/primitives/string.rb +0 -43
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "dry/inflector"
5
+ rescue LoadError
6
+ # noop
7
+ end
8
+
9
+ begin
10
+ require "active_support/inflector"
11
+ rescue LoadError
12
+ # noop
13
+ end
14
+
15
+ module SiteMaps::Primitive
16
+ class String < ::String
17
+ def self.inflector
18
+ return @inflector if defined?(@inflector)
19
+
20
+ @inflector = if defined?(::ActiveSupport::Inflector)
21
+ ::ActiveSupport::Inflector
22
+ elsif defined?(::Dry::Inflector)
23
+ ::Dry::Inflector.new
24
+ end
25
+ end
26
+
27
+ def classify
28
+ new_str = inflector&.classify(self) || split("/").collect do |c|
29
+ c.split("_").collect(&:capitalize).join
30
+ end.join("::")
31
+
32
+ self.class.new(new_str)
33
+ end
34
+
35
+ def constantize
36
+ inflector&.constantize(self) || Object.const_get(self)
37
+ end
38
+
39
+ def underscore
40
+ new_str = sub(/^::/, "")
41
+ .gsub("::", "/")
42
+ .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
43
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
44
+ .tr("-", "_")
45
+ .tr(".", "_")
46
+ .gsub(/\s/, "_")
47
+ .gsub(/__+/, "_")
48
+ .downcase
49
+
50
+ self.class.new(new_str)
51
+ end
52
+
53
+ def pluralize
54
+ new_str = inflector&.pluralize(self) || begin
55
+ # dummy pluralize
56
+ if /y$/.match?(self)
57
+ sub(/y$/, "ies")
58
+ elsif /s$/.match?(self)
59
+ self
60
+ else
61
+ self + "s"
62
+ end
63
+ end
64
+
65
+ new_str.is_a?(self.class) ? new_str : self.class.new(new_str)
66
+ end
67
+
68
+ def singularize
69
+ new_str = inflector&.singularize(self) || begin
70
+ # dummy singularize
71
+ if /ies$/.match?(self)
72
+ sub(/ies$/, "y")
73
+ elsif /s$/.match?(self)
74
+ sub(/s$/, "")
75
+ else
76
+ self
77
+ end
78
+ end
79
+
80
+ new_str.is_a?(self.class) ? new_str : self.class.new(new_str)
81
+ end
82
+
83
+ def camelize(uppercase_first_letter = true)
84
+ new_str = inflector&.camelize(self, uppercase_first_letter) || begin
85
+ # dummy camelize
86
+ str = to_s
87
+ str = str.sub(/^[a-z\d]*/) { $&.capitalize }
88
+ str = str.tr("-", "_")
89
+ str = str.gsub(/(?:_|(\/))([a-z\d]*)/i) { "#{$1}#{$2.capitalize}" }
90
+ str = str.gsub("/", "::")
91
+ unless uppercase_first_letter
92
+ str = str.sub(/^[A-Z]*/) { $&.downcase }
93
+ end
94
+ str
95
+ end
96
+
97
+ self.class.new(new_str)
98
+ end
99
+
100
+ protected
101
+
102
+ def inflector
103
+ self.class.inflector
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ module RobotsTxt
5
+ class << self
6
+ def sitemap_directive(url = nil)
7
+ url ||= SiteMaps.current_adapter&.config&.url
8
+ raise ArgumentError, "No sitemap URL provided and no adapter configured" unless url
9
+
10
+ "Sitemap: #{url}"
11
+ end
12
+
13
+ def render(sitemap_url: nil, extra_directives: [])
14
+ lines = ["User-agent: *", "Allow: /"]
15
+ extra_directives.each { |d| lines << d }
16
+ lines << sitemap_directive(sitemap_url)
17
+ lines.join("\n") + "\n"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -2,12 +2,12 @@
2
2
 
3
3
  module SiteMaps
4
4
  module Runner::EventListener
5
- extend Primitives::Output
5
+ extend Primitive::Output
6
6
 
7
7
  module_function
8
8
 
9
9
  def [](event_name)
10
- method_name = Primitives::String.new(event_name).underscore.to_sym
10
+ method_name = Primitive::String.new(event_name).underscore.to_sym
11
11
  return unless respond_to?(:"on_#{method_name}")
12
12
 
13
13
  method(:"on_#{method_name}")
@@ -4,8 +4,9 @@ module SiteMaps
4
4
  class Runner
5
5
  attr_reader :adapter
6
6
 
7
- def initialize(adapter = SiteMaps.current_adapter, max_threads: 4)
7
+ def initialize(adapter = SiteMaps.current_adapter, max_threads: 4, ping: nil)
8
8
  @adapter = adapter.tap(&:reset!)
9
+ @ping = ping
9
10
  @pool = Concurrent::FixedThreadPool.new(max_threads)
10
11
  @execution = Concurrent::Hash.new
11
12
  @failed = Concurrent::AtomicBoolean.new(false)
@@ -62,8 +63,9 @@ module SiteMaps
62
63
  builder = SiteMaps::SitemapBuilder.new(
63
64
  adapter: adapter,
64
65
  location: process.location(**kwargs),
65
- notification_payload: { process: process }
66
+ notification_payload: {process: process}
66
67
  )
68
+ adapter.process_mixins.each { |mixin| builder.extend(mixin) }
67
69
  process.call(builder, **kwargs)
68
70
  builder.finalize!
69
71
  end
@@ -88,9 +90,21 @@ module SiteMaps
88
90
  adapter.repo.remaining_index_links.each do |item|
89
91
  adapter.sitemap_index.add(item)
90
92
  end
93
+ adapter.external_sitemaps.each do |item|
94
+ adapter.sitemap_index.add(item)
95
+ end
91
96
  unless adapter.sitemap_index.empty?
92
97
  raw_data = adapter.sitemap_index.to_xml
93
- adapter.write(adapter.config.url, raw_data, last_modified: Time.now)
98
+ adapter.write(adapter.config.url, raw_data, last_modified: adapter.sitemap_index.last_modified)
99
+ end
100
+ should_ping = @ping.nil? ? adapter.config.respond_to?(:ping_search_engines?) && adapter.config.ping_search_engines? : @ping
101
+ ping_search_engines if should_ping
102
+ end
103
+
104
+ def ping_search_engines
105
+ engines = adapter.config.respond_to?(:ping_engines) ? adapter.config.ping_engines : nil
106
+ SiteMaps::Notification.instrument("sitemaps.ping") do |payload|
107
+ payload[:results] = SiteMaps::Ping.ping(adapter.config.url, engines: engines)
94
108
  end
95
109
  end
96
110
 
@@ -6,7 +6,7 @@ module SiteMaps
6
6
 
7
7
  def initialize(adapter:, location: nil, notification_payload: {})
8
8
  @adapter = adapter
9
- @url_set = SiteMaps::Builder::URLSet.new
9
+ @url_set = build_url_set
10
10
  @location = location
11
11
  @mutex = Mutex.new
12
12
  @notification_payload = notification_payload
@@ -15,10 +15,13 @@ module SiteMaps
15
15
  def add(path, params: nil, **options)
16
16
  @mutex.synchronize do
17
17
  link = build_link(path, params)
18
- url_set.add(link, **options)
18
+ filtered_options = adapter.apply_url_filters(link, options)
19
+ return if filtered_options.nil?
20
+
21
+ url_set.add(link, **filtered_options)
19
22
  rescue SiteMaps::FullSitemapError
20
23
  finalize_and_start_next_urlset!
21
- url_set.add(link, **options)
24
+ url_set.add(link, **filtered_options)
22
25
  end
23
26
  end
24
27
 
@@ -66,7 +69,16 @@ module SiteMaps
66
69
  adapter.write(sitemap_url, raw_data, last_modified: url_set.last_modified)
67
70
  add_sitemap_index(sitemap_url, lastmod: url_set.last_modified)
68
71
  end
69
- @url_set = SiteMaps::Builder::URLSet.new
72
+ @url_set = build_url_set
73
+ end
74
+
75
+ def build_url_set
76
+ options = {}
77
+ options[:max_links] = config.max_links if config.respond_to?(:max_links)
78
+ options[:emit_priority] = config.emit_priority if config.respond_to?(:emit_priority)
79
+ options[:emit_changefreq] = config.emit_changefreq if config.respond_to?(:emit_changefreq)
80
+ options[:xsl_url] = config.xsl_stylesheet_url if config.respond_to?(:xsl_stylesheet_url)
81
+ SiteMaps::Builder::URLSet.new(**options)
70
82
  end
71
83
 
72
84
  def build_link(path, params)
@@ -43,6 +43,9 @@ module SiteMaps
43
43
  raise FileNotFoundError.new("The file #{@location} does not exist")
44
44
  rescue OpenURI::HTTPError
45
45
  raise FileNotFoundError.new("The file #{@location} could not be opened")
46
+ rescue SocketError, Errno::ECONNREFUSED, Errno::ETIMEDOUT,
47
+ Net::OpenTimeout, Net::ReadTimeout => e
48
+ raise FileNotFoundError.new("The file #{@location} could not be reached: #{e.message}")
46
49
  end
47
50
 
48
51
  def compressed?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SiteMaps
4
- VERSION = "0.0.1.beta3"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/site_maps.rb CHANGED
@@ -3,6 +3,7 @@
3
3
  require_relative "site_maps/version"
4
4
 
5
5
  require "builder"
6
+ require "logger"
6
7
  require "concurrent-ruby"
7
8
  require "date"
8
9
  require "fileutils"
@@ -23,6 +24,7 @@ loader.inflector.inflect "dsl" => "DSL"
23
24
  loader.inflector.inflect "url_set" => "URLSet"
24
25
  loader.inflector.inflect "url" => "URL"
25
26
  loader.inflector.inflect "xml" => "XML"
27
+ loader.inflector.inflect "xsl_stylesheet" => "XSLStylesheet"
26
28
  loader.log! if ENV["DEBUG_ZEITWERK"]
27
29
  loader.setup
28
30
 
@@ -33,7 +35,7 @@ module SiteMaps
33
35
  news: 1_000
34
36
  }
35
37
  MAX_FILESIZE = 50_000_000 # bytes
36
- DEFAULT_LOGGER = Logger.new($stdout)
38
+ DEFAULT_LOGGER = ::Logger.new($stdout)
37
39
 
38
40
  Error = Class.new(StandardError)
39
41
  AdapterNotFound = Class.new(Error)
@@ -42,6 +44,10 @@ module SiteMaps
42
44
  FullSitemapError = Class.new(Error)
43
45
  ConfigurationError = Class.new(Error)
44
46
 
47
+ SCOPE_KEY = :__site_maps_scope__
48
+
49
+ @mutex = Mutex.new
50
+
45
51
  class << self
46
52
  attr_reader :current_adapter
47
53
  attr_writer :logger
@@ -54,18 +60,51 @@ module SiteMaps
54
60
  adapter_class = if adapter.is_a?(Class) # && adapter < Adapters::Adapter
55
61
  adapter
56
62
  else
57
- const_name = Primitives::String.new(adapter.to_s).classify
63
+ const_name = Primitive::String.new(adapter.to_s).classify
58
64
  begin
59
65
  Adapters.const_get(const_name)
60
66
  rescue NameError
61
67
  raise AdapterNotFound, "Adapter #{adapter.inspect} not found"
62
68
  end
63
69
  end
64
- @current_adapter = adapter_class.new(**options, &block)
70
+ instance = adapter_class.new(**options, &block)
71
+ if (scope = Thread.current[SCOPE_KEY])
72
+ scope[:adapter] = instance
73
+ else
74
+ @current_adapter = instance
75
+ end
76
+ instance
77
+ end
78
+
79
+ # Register a context-aware sitemap definition. The block is stored and
80
+ # called when {.generate} is invoked with a `context:` hash. The hash
81
+ # keys are passed as keyword arguments to the block.
82
+ #
83
+ # Example:
84
+ # # config/sitemap.rb
85
+ # SiteMaps.define do |site:|
86
+ # use(:file_system) do
87
+ # config.url = "https://#{site.domain}/sitemap.xml"
88
+ # process { |s| site.pages.each { |p| s.add(p.path) } }
89
+ # end
90
+ # end
91
+ #
92
+ # # Usage:
93
+ # SiteMaps.generate(config_file: "config/sitemap.rb", context: {site: site})
94
+ # .enqueue_all
95
+ # .run
96
+ #
97
+ # @param block [Proc] Receives keyword arguments from the `context:` hash
98
+ def define(&block)
99
+ if (scope = Thread.current[SCOPE_KEY])
100
+ scope[:definition] = block
101
+ else
102
+ @definition = block
103
+ end
65
104
  end
66
105
 
67
106
  def config
68
- @config ||= Configuration.new
107
+ @mutex.synchronize { @config ||= Configuration.new }
69
108
  yield(@config) if block_given?
70
109
  @config
71
110
  end
@@ -88,21 +127,53 @@ module SiteMaps
88
127
  # .enqueue_remaining # Enqueue all other non-enqueued processes
89
128
  # .run
90
129
  #
130
+ # For multi-tenant / context-aware configurations, the config file can
131
+ # use {.define} and pass runtime context as keyword arguments via the
132
+ # `context:` kwarg:
133
+ #
134
+ # Example:
135
+ # SiteMaps.generate(config_file: "config/sitemap.rb", context: {site: site})
136
+ # .enqueue_all
137
+ # .run
138
+ #
91
139
  # @param config_file [String] The path to a configuration file
140
+ # @param context [Hash] Keyword arguments passed to the block registered
141
+ # via {.define}. Must be a Hash (or nil for no context).
92
142
  # @param options [Hash] Options to pass to the runner
93
143
  # @return [Runner] An instance of the runner
94
- def generate(config_file: nil, **options)
144
+ def generate(config_file: nil, context: nil, **options)
145
+ adapter = nil
95
146
  if config_file
96
- @current_adapter = nil
97
- load(config_file)
147
+ previous_scope = Thread.current[SCOPE_KEY]
148
+ scope = {adapter: nil, definition: nil}
149
+ Thread.current[SCOPE_KEY] = scope
150
+ begin
151
+ load(config_file)
152
+ if scope[:definition]
153
+ kwargs = context || {}
154
+ raise ArgumentError, "context: must be a Hash, got #{context.class}" unless kwargs.is_a?(Hash)
155
+
156
+ instance_exec(**kwargs, &scope[:definition])
157
+ end
158
+ adapter = scope[:adapter]
159
+ ensure
160
+ Thread.current[SCOPE_KEY] = previous_scope
161
+ end
162
+ # Preserve backward-compat: expose the generated adapter through
163
+ # the `current_adapter` singleton for single-tenant callers. In
164
+ # multi-tenant concurrent use, last-writer-wins — each Runner still
165
+ # gets its own isolated adapter from the thread-local scope above.
166
+ @current_adapter = adapter if adapter
167
+ else
168
+ adapter = current_adapter
98
169
  end
99
- raise AdapterNotSetError, "No adapter set. Use SiteMaps.use to set an adapter" unless current_adapter
170
+ raise AdapterNotSetError, "No adapter set. Use SiteMaps.use to set an adapter" unless adapter
100
171
 
101
- Runner.new(current_adapter, **options)
172
+ Runner.new(adapter, **options)
102
173
  end
103
174
 
104
175
  def logger
105
- @logger ||= DEFAULT_LOGGER
176
+ @mutex.synchronize { @logger ||= DEFAULT_LOGGER }
106
177
  end
107
178
  end
108
179
  end
data/site_maps.gemspec CHANGED
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
17
17
 
18
18
  spec.homepage = "https://github.com/marcosgz/site_maps"
19
19
  spec.license = "MIT"
20
- spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
20
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.2.0")
21
21
 
22
22
  raise "RubyGems 2.0 or newer is required to protect against public gem pushes." unless spec.respond_to?(:metadata)
23
23
 
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_maps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.beta3
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos G. Zimmermann
8
- autorequire:
9
8
  bindir: exec
10
9
  cert_chain: []
11
- date: 2024-11-23 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rack
@@ -96,7 +95,9 @@ files:
96
95
  - ".rspec"
97
96
  - ".rubocop.yml"
98
97
  - ".tool-versions"
98
+ - AGENTS.md
99
99
  - CHANGELOG.md
100
+ - CLAUDE.md
100
101
  - Gemfile
101
102
  - Gemfile.lock
102
103
  - LICENSE.txt
@@ -104,6 +105,16 @@ files:
104
105
  - Rakefile
105
106
  - bin/console
106
107
  - bin/setup
108
+ - docs/README.md
109
+ - docs/adapters.md
110
+ - docs/api.md
111
+ - docs/cli.md
112
+ - docs/events.md
113
+ - docs/extensions.md
114
+ - docs/getting-started.md
115
+ - docs/middleware.md
116
+ - docs/processes.md
117
+ - docs/rails.md
107
118
  - exec/site_maps
108
119
  - lib/site-maps.rb
109
120
  - lib/site_maps.rb
@@ -124,18 +135,22 @@ files:
124
135
  - lib/site_maps/builder/sitemap_index/item.rb
125
136
  - lib/site_maps/builder/url.rb
126
137
  - lib/site_maps/builder/url_set.rb
138
+ - lib/site_maps/builder/xsl_stylesheet.rb
127
139
  - lib/site_maps/cli.rb
128
140
  - lib/site_maps/configuration.rb
129
141
  - lib/site_maps/incremental_location.rb
142
+ - lib/site_maps/middleware.rb
130
143
  - lib/site_maps/notification.rb
131
144
  - lib/site_maps/notification/bus.rb
132
145
  - lib/site_maps/notification/event.rb
133
146
  - lib/site_maps/notification/publisher.rb
134
- - lib/site_maps/primitives/array.rb
135
- - lib/site_maps/primitives/output.rb
136
- - lib/site_maps/primitives/string.rb
147
+ - lib/site_maps/ping.rb
148
+ - lib/site_maps/primitive/array.rb
149
+ - lib/site_maps/primitive/output.rb
150
+ - lib/site_maps/primitive/string.rb
137
151
  - lib/site_maps/process.rb
138
152
  - lib/site_maps/railtie.rb
153
+ - lib/site_maps/robots_txt.rb
139
154
  - lib/site_maps/runner.rb
140
155
  - lib/site_maps/runner/event_listener.rb
141
156
  - lib/site_maps/sitemap_builder.rb
@@ -150,7 +165,6 @@ metadata:
150
165
  bug_tracker_uri: https://github.com/marcosgz/site_maps/issues
151
166
  documentation_uri: https://github.com/marcosgz/site_maps
152
167
  source_code_uri: https://github.com/marcosgz/site_maps
153
- post_install_message:
154
168
  rdoc_options: []
155
169
  require_paths:
156
170
  - lib
@@ -158,15 +172,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
158
172
  requirements:
159
173
  - - ">="
160
174
  - !ruby/object:Gem::Version
161
- version: 2.7.0
175
+ version: 3.2.0
162
176
  required_rubygems_version: !ruby/object:Gem::Requirement
163
177
  requirements:
164
178
  - - ">="
165
179
  - !ruby/object:Gem::Version
166
180
  version: '0'
167
181
  requirements: []
168
- rubygems_version: 3.5.21
169
- signing_key:
182
+ rubygems_version: 3.6.9
170
183
  specification_version: 4
171
184
  summary: Concurrent and Incremental sitemap.xml builder for ruby applications
172
185
  test_files: []
@@ -1,43 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- begin
4
- require "dry/inflector"
5
- rescue LoadError
6
- # noop
7
- end
8
-
9
- begin
10
- require "active_support/inflector"
11
- rescue LoadError
12
- # noop
13
- end
14
-
15
- module SiteMaps::Primitives
16
- class String < ::String
17
- def classify
18
- new_str = if defined?(Dry::Inflector)
19
- Dry::Inflector.new.classify(self)
20
- elsif defined?(ActiveSupport::Inflector)
21
- ActiveSupport::Inflector.classify(self)
22
- else
23
- split("_").map(&:capitalize).join
24
- end
25
-
26
- self.class.new(new_str)
27
- end
28
-
29
- def underscore
30
- new_str = sub(/^::/, "")
31
- .gsub("::", "/")
32
- .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
33
- .gsub(/([a-z\d])([A-Z])/, '\1_\2')
34
- .tr("-", "_")
35
- .tr(".", "_")
36
- .gsub(/\s/, "_")
37
- .gsub(/__+/, "_")
38
- .downcase
39
-
40
- self.class.new(new_str)
41
- end
42
- end
43
- end