site_maps 0.0.1.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/main.yml +45 -0
  3. data/.gitignore +16 -0
  4. data/.rspec +1 -0
  5. data/.rubocop.yml +36 -0
  6. data/.tool-versions +1 -0
  7. data/CHANGELOG.md +8 -0
  8. data/Gemfile +18 -0
  9. data/Gemfile.lock +134 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +186 -0
  12. data/Rakefile +4 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +7 -0
  15. data/exec/site_maps +9 -0
  16. data/lib/site-maps.rb +3 -0
  17. data/lib/site_maps/adapters/adapter.rb +80 -0
  18. data/lib/site_maps/adapters/aws_sdk/config.rb +51 -0
  19. data/lib/site_maps/adapters/aws_sdk/location.rb +9 -0
  20. data/lib/site_maps/adapters/aws_sdk/storage.rb +52 -0
  21. data/lib/site_maps/adapters/aws_sdk.rb +31 -0
  22. data/lib/site_maps/adapters/file_system/config.rb +5 -0
  23. data/lib/site_maps/adapters/file_system/location.rb +35 -0
  24. data/lib/site_maps/adapters/file_system/storage.rb +61 -0
  25. data/lib/site_maps/adapters/file_system.rb +26 -0
  26. data/lib/site_maps/adapters/noop.rb +18 -0
  27. data/lib/site_maps/atomic_repository.rb +24 -0
  28. data/lib/site_maps/builder/link.rb +27 -0
  29. data/lib/site_maps/builder/normalizer.rb +48 -0
  30. data/lib/site_maps/builder/sitemap_index/item.rb +35 -0
  31. data/lib/site_maps/builder/sitemap_index.rb +40 -0
  32. data/lib/site_maps/builder/url.rb +152 -0
  33. data/lib/site_maps/builder/url_set.rb +92 -0
  34. data/lib/site_maps/cli.rb +68 -0
  35. data/lib/site_maps/configuration.rb +119 -0
  36. data/lib/site_maps/incremental_location.rb +62 -0
  37. data/lib/site_maps/notification/bus.rb +90 -0
  38. data/lib/site_maps/notification/event.rb +50 -0
  39. data/lib/site_maps/notification/publisher.rb +78 -0
  40. data/lib/site_maps/notification.rb +36 -0
  41. data/lib/site_maps/primitives/array.rb +15 -0
  42. data/lib/site_maps/primitives/output.rb +66 -0
  43. data/lib/site_maps/primitives/string.rb +43 -0
  44. data/lib/site_maps/process.rb +29 -0
  45. data/lib/site_maps/railtie.rb +18 -0
  46. data/lib/site_maps/runner/event_listener.rb +78 -0
  47. data/lib/site_maps/runner.rb +136 -0
  48. data/lib/site_maps/sitemap_builder.rb +75 -0
  49. data/lib/site_maps/sitemap_reader.rb +56 -0
  50. data/lib/site_maps/version.rb +5 -0
  51. data/lib/site_maps.rb +112 -0
  52. data/site_maps.gemspec +44 -0
  53. metadata +172 -0
@@ -0,0 +1,68 @@
1
+ require "thor"
2
+
3
+ module SiteMaps
4
+ class CLI < Thor
5
+ method_option :debug, type: :boolean, default: false
6
+ method_option :logfile, type: :string, default: nil
7
+ method_option :pidfile, type: :string, default: nil
8
+ method_option :config_file, type: :string, aliases: "-r", default: nil
9
+ method_option :max_threads, type: :numeric, aliases: "-c", default: 4
10
+ method_option :context, type: :hash, default: {}
11
+ method_option :enqueue_remaining, type: :boolean, default: false
12
+
13
+ desc "generate 1st_process,2nd_process ... ,Nth_process", "Generate sitemap.xml files for the given processes"
14
+ default_command :start
15
+
16
+ def generate(processes = "")
17
+ load_rails if rails_app?
18
+
19
+ opts = (@options || {}).transform_keys(&:to_sym)
20
+ if (logfile = opts[:logfile])
21
+ SiteMaps.logger = Logger.new(logfile)
22
+ end
23
+ if opts[:debug]
24
+ SiteMaps.logger.level = Logger::DEBUG
25
+ end
26
+
27
+ SiteMaps::Notification.subscribe(SiteMaps::Runner::EventListener)
28
+
29
+ runner = SiteMaps.generate(
30
+ config_file: opts[:config_file],
31
+ max_threads: opts[:max_threads]
32
+ )
33
+ if processes.empty?
34
+ runner.enqueue_all
35
+ else
36
+ kwargs = (opts[:context] || {}).transform_keys(&:to_sym)
37
+ processes.split(",").each do |process|
38
+ runner.enqueue(process.strip.to_sym, **kwargs)
39
+ end
40
+ end
41
+ if opts[:enqueue_remaining]
42
+ runner.enqueue_remaining
43
+ end
44
+
45
+ runner.run
46
+ end
47
+
48
+ desc "version", "Print the version"
49
+ def version
50
+ puts "SiteMaps version: #{SiteMaps::VERSION}"
51
+ end
52
+
53
+ default_task :help
54
+
55
+ private
56
+
57
+ def rails_app?
58
+ File.exist?(File.join(Dir.pwd, "config", "application.rb"))
59
+ end
60
+
61
+ def load_rails
62
+ require File.expand_path(File.join(Dir.pwd, "config", "application.rb"))
63
+ require_relative "railtie"
64
+
65
+ ::Rails.application.require_environment!
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ class Configuration
5
+ class << self
6
+ def attributes
7
+ @attributes || {}
8
+ end
9
+
10
+ def attribute(name, default: nil)
11
+ @attributes ||= {}
12
+ @attributes[name] = default
13
+
14
+ unless method_defined?(name)
15
+ define_method(name) do
16
+ instance_variable_get(:"@#{name}")
17
+ end
18
+ end
19
+
20
+ unless method_defined?(:"#{name}=")
21
+ define_method(:"#{name}=") do |value|
22
+ instance_variable_set(:"@#{name}", value)
23
+ end
24
+ end
25
+
26
+ unless method_defined?(:"#{name}?")
27
+ define_method(:"#{name}?") do
28
+ !!send(name)
29
+ end
30
+ end
31
+ end
32
+
33
+ def inherited(subclass)
34
+ subclass.instance_variable_set(:@attributes, attributes.dup)
35
+ end
36
+ end
37
+
38
+ attribute :url
39
+ attribute :directory, default: "/tmp/sitemaps"
40
+
41
+ def initialize(**options)
42
+ default_attributes.merge(options).each do |key, value|
43
+ send(:"#{key}=", value)
44
+ rescue NoMethodError
45
+ raise ConfigurationError, <<~ERROR
46
+ Unknown configuration option: #{key}
47
+ ERROR
48
+ end
49
+ end
50
+
51
+ def becomes(klass, **options)
52
+ klass.new(**to_h, **options)
53
+ end
54
+
55
+ def to_h
56
+ instance_variables.each_with_object({}) do |var, hash|
57
+ hash[var.to_s.delete("@").to_sym] = instance_variable_get(var)
58
+ end
59
+ end
60
+
61
+ def url
62
+ @url || validate_url!
63
+ end
64
+
65
+ def base_uri
66
+ ::URI.parse(url).tap do |uri|
67
+ uri.path = ""
68
+ uri.query = nil
69
+ uri.fragment = nil
70
+ end
71
+ end
72
+
73
+ def local_sitemap_path
74
+ filename = ::File.basename(url)
75
+ Pathname.new(directory).join(filename)
76
+ end
77
+
78
+ def fetch_sitemap_index_links
79
+ doc = SiteMaps::SitemapReader.new(local_sitemap_path.exist? ? local_sitemap_path : url).to_doc
80
+
81
+ doc.css("sitemapindex sitemap").map do |url|
82
+ SiteMaps::Builder::SitemapIndex::Item.new(
83
+ url.at_css("loc").text,
84
+ url.at_css("lastmod")&.text
85
+ )
86
+ end
87
+ rescue SiteMaps::SitemapReader::Error
88
+ []
89
+ end
90
+
91
+ def remote_sitemap_directory
92
+ path = ::URI.parse(url).path
93
+ path = path[1..-1] if path.start_with?("/")
94
+ path.split("/")[0..-2].join("/")
95
+ end
96
+
97
+ private
98
+
99
+ def validate_url!
100
+ return if @url
101
+
102
+ raise ConfigurationError, <<~ERROR
103
+ You must set a sitemap URL in your configuration to use the add method.
104
+
105
+ Example:
106
+ SiteMaps.configure do |config|
107
+ config.url = "https://example.com/sitemap.xml"
108
+ end
109
+ ERROR
110
+ end
111
+
112
+ def default_attributes
113
+ self.class.attributes.each_with_object({}) do |(key, default), hash|
114
+ value = default.respond_to?(:call) ? default.call : default
115
+ hash[key] = value unless value.nil?
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ class IncrementalLocation
5
+ FILENAME = "sitemap.xml"
6
+ PLACEHOLDER = "%{index}"
7
+
8
+ def initialize(main_url, process_location)
9
+ @main_uri = URI(main_url)
10
+ @index = Concurrent::AtomicFixnum.new(0)
11
+ normalize(process_location || @main_uri.to_s)
12
+ end
13
+
14
+ def url
15
+ placeholder_url % {index: @index.value}
16
+ end
17
+
18
+ def next
19
+ @index.increment
20
+ self
21
+ end
22
+
23
+ def main_url
24
+ main_uri.to_s
25
+ end
26
+
27
+ def relative_directory
28
+ File.dirname(@uri.path).sub(%r{^/}, "")
29
+ end
30
+
31
+ private
32
+
33
+ attr_reader :main_uri, :placeholder_url
34
+
35
+ def base_url
36
+ main_uri.dup.tap { |uri| uri.path = "" }
37
+ end
38
+
39
+ def base_dir
40
+ File.dirname(main_uri.path)
41
+ end
42
+
43
+ def normalize(loc)
44
+ uri = if %r{^https?://}.match?(loc)
45
+ URI(loc)
46
+ elsif loc.start_with?("/")
47
+ main_uri.dup.tap { |uri| uri.path = loc }
48
+ else
49
+ main_uri.dup.tap { |uri| uri.path = File.join(base_dir, loc) }
50
+ end
51
+ unless %w[.xml .xml.gz].include?(File.extname(uri.path))
52
+ uri.path = File.join(uri.path, FILENAME)
53
+ end
54
+ base = uri.dup.tap { |v| v.path = "" }.to_s
55
+ basename = File.basename(uri.path)
56
+ index_basename = basename.sub(/[\.](xml|xml\.gz)$/, "#{PLACEHOLDER}.\\1")
57
+
58
+ @placeholder_url = File.join(base, File.join(File.dirname(uri.path), index_basename))
59
+ @uri = URI(File.join(base, File.join(File.dirname(uri.path), basename)))
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ module Notification
5
+ class Bus
6
+ attr_reader :listeners, :events
7
+
8
+ def initialize
9
+ @listeners = Concurrent::Hash.new { |h, k| h[k] = Concurrent::Array.new }
10
+ @events = Concurrent::Hash.new
11
+ end
12
+
13
+ def publish(event_id, payload)
14
+ raise UnregisteredEventError, event_id unless can_handle?(event_id)
15
+
16
+ process(event_id, payload) do |event, listener|
17
+ # Concurrent::Future.execute { listener.call(event) }
18
+ listener.call(event)
19
+ end
20
+ self
21
+ end
22
+
23
+ def attach(listener)
24
+ events.each do |id, event|
25
+ method_name = event.listener_method
26
+ next unless listener.respond_to?(method_name)
27
+
28
+ listeners[id] << listener.method(method_name)
29
+ end
30
+ self
31
+ end
32
+
33
+ def unsubscribe(listener)
34
+ listeners.each do |id, arr|
35
+ arr.each do |func|
36
+ listeners[id].delete(func) if func.receiver == listener
37
+ end
38
+ end
39
+ self
40
+ end
41
+ alias_method :detach, :unsubscribe
42
+
43
+ def subscribe(object_or_event_id, &block)
44
+ raise(InvalidSubscriberError, object_or_event_id) unless can_handle?(object_or_event_id)
45
+
46
+ if block
47
+ listeners[object_or_event_id] << block
48
+ else
49
+ attach(object_or_event_id)
50
+ end
51
+
52
+ self
53
+ end
54
+
55
+ # rubocop:disable Performance/RedundantEqualityComparisonBlock
56
+ def subscribed?(listener)
57
+ listeners.values.any? { |value| value.any? { |func| func == listener } } ||
58
+ (
59
+ methods = events.values.map(&:listener_method)
60
+ .select { |method_name| listener.respond_to?(method_name) }
61
+ .map { |method_name| listener.method(method_name) }
62
+ methods && listeners.values.any? { |value| (methods & value).size > 0 }
63
+ )
64
+ end
65
+ # rubocop:enable Performance/RedundantEqualityComparisonBlock
66
+
67
+ def can_handle?(object_or_event_id)
68
+ case object_or_event_id
69
+ when String, Symbol
70
+ events.key?(object_or_event_id)
71
+ else
72
+ events
73
+ .values
74
+ .map(&:listener_method)
75
+ .any? { |method_name| object_or_event_id.respond_to?(method_name) }
76
+ end
77
+ end
78
+
79
+ protected
80
+
81
+ def process(event_id, payload)
82
+ listeners[event_id].each do |listener|
83
+ event = events[event_id].payload(payload)
84
+
85
+ yield(event, listener)
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ module Notification
5
+ class Event
6
+ extend Forwardable
7
+
8
+ def_delegators :@payload, :[], :fetch, :to_h, :key?
9
+ alias_method :to_hash, :to_h
10
+
11
+ attr_reader :id
12
+
13
+ # Initialize a new event
14
+ #
15
+ # @param [Symbol, String] id The event identifier
16
+ # @param [Hash] payload
17
+ #
18
+ # @return [Event]
19
+ #
20
+ # @api private
21
+ def initialize(id, payload = {})
22
+ @id = id
23
+ @payload = payload
24
+ end
25
+
26
+ # Get or set a payload
27
+ #
28
+ # @overload
29
+ # @return [Hash] payload
30
+ #
31
+ # @overload payload(data)
32
+ # @param [Hash] data A new payload
33
+ # @return [Event] A copy of the event with the provided payload
34
+ #
35
+ # @api public
36
+ def payload(data = nil)
37
+ if data
38
+ self.class.new(id, @payload.merge(data))
39
+ else
40
+ @payload
41
+ end
42
+ end
43
+
44
+ # @api private
45
+ def listener_method
46
+ @listener_method ||= Primitives::String.new("on_#{id}").underscore.to_sym
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Notification
4
+ module Publisher
5
+ def self.included(klass)
6
+ klass.extend(ClassMethods)
7
+ end
8
+
9
+ # Class interface for publishers
10
+ #
11
+ # @api public
12
+ module ClassMethods
13
+ extend Forwardable
14
+ def_delegators :bus, :publish, :subscribed?, :unsubscribe
15
+
16
+ # Register a new event type
17
+ #
18
+ # @param [Symbol,String] event_id The event identifier
19
+ # @param [Hash] payload Optional default payload
20
+ #
21
+ # @return [self]
22
+ #
23
+ # @api public
24
+ def register_event(event_id, payload = {})
25
+ bus.events[event_id] = Event.new(event_id, payload)
26
+ self
27
+ end
28
+
29
+ # Publish an event with extra runtime information to the payload
30
+ #
31
+ # @param [String] event_id The event identifier
32
+ # @param [Hash] payload An optional payload
33
+ # @raise [UnregisteredEventError] if the event is not registered
34
+ #
35
+ # @api public
36
+ def instrument(event_id, payload = {})
37
+ publish_event = false # ensure block is also called on error
38
+ raise(UnregisteredEventError, event_id) unless bus.can_handle?(event_id)
39
+
40
+ payload[:__started_at__] = Time.now
41
+ yield(payload).tap { publish_event = true }
42
+ ensure
43
+ if publish_event
44
+ payload[:runtime] ||= Time.now - payload.delete(:__started_at__) if payload[:__started_at__]
45
+ bus.publish(event_id, payload)
46
+ end
47
+ end
48
+
49
+ # Subscribe to events.
50
+ #
51
+ # @param [Symbol,String,Object] object_or_event_id The event identifier or a listener object
52
+ # @param [Hash] filter_hash An optional event filter
53
+ #
54
+ # @raise [SiteMaps::Notification::InvalidSubscriberError] if the subscriber is not registered
55
+ # @return [Object] self
56
+ #
57
+ #
58
+ # @api public
59
+ def subscribe(object_or_event_id, &block)
60
+ if bus.can_handle?(object_or_event_id)
61
+ if block
62
+ bus.subscribe(object_or_event_id, &block)
63
+ else
64
+ bus.attach(object_or_event_id)
65
+ end
66
+
67
+ self
68
+ else
69
+ raise InvalidSubscriberError, object_or_event_id
70
+ end
71
+ end
72
+
73
+ def bus
74
+ @bus ||= Bus.new
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ module Notification
5
+ Error = Class.new(SiteMaps::Error)
6
+
7
+ class UnregisteredEventError < Error
8
+ def initialize(object_or_event_id)
9
+ case object_or_event_id
10
+ when String, Symbol
11
+ super("You are trying to publish an unregistered event: `#{object_or_event_id}`")
12
+ else
13
+ super("You are trying to publish an unregistered event")
14
+ end
15
+ end
16
+ end
17
+
18
+ class InvalidSubscriberError < Error
19
+ def initialize(object_or_event_id)
20
+ case object_or_event_id
21
+ when String, Symbol
22
+ super("you are trying to subscribe to an event: `#{object_or_event_id}` that has not been registered")
23
+ else
24
+ super("you try use subscriber object that will never be executed")
25
+ end
26
+ end
27
+ end
28
+
29
+ include Publisher
30
+
31
+ register_event "sitemaps.builder.finalize_urlset"
32
+ register_event "sitemaps.runner.before_process_execution"
33
+ register_event "sitemaps.runner.enqueue_process"
34
+ register_event "sitemaps.runner.process_execution"
35
+ end
36
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Primitives
4
+ class Array < ::Array
5
+ def self.wrap(object)
6
+ if object.nil?
7
+ []
8
+ elsif object.respond_to?(:to_ary)
9
+ object.to_ary || [object]
10
+ else
11
+ [object]
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "rainbow"
5
+ rescue LoadError
6
+ end
7
+
8
+ module SiteMaps
9
+ module Primitives
10
+ module Output
11
+ module_function
12
+
13
+ def formatted_runtime(number)
14
+ colorize(sprintf("%.3f ms", number), :lightgray)
15
+ end
16
+
17
+ def runtime_padding(number, extra = 2)
18
+ " " * (extra + sprintf("%.3f ms", number).size)
19
+ end
20
+
21
+ def colorize(text, *attributes)
22
+ if defined? Rainbow
23
+ attributes.reduce(Rainbow(text)) { |p, a| p.public_send(a) }
24
+ else
25
+ text
26
+ end
27
+ end
28
+
29
+ def print_error(message_or_error, backtrace: false, **options)
30
+ options[:level] ||= :error
31
+ message = message_or_error.to_s
32
+
33
+ print_message(message, output: :stderr, **options)
34
+
35
+ if message_or_error.is_a?(Exception) && backtrace
36
+ limit = backtrace.is_a?(Integer) ? backtrace : -1
37
+ print_backtrace(message_or_error, limit: limit, level: options[:level])
38
+ end
39
+ end
40
+
41
+ def print_backtrace(error, limit: -1, **options)
42
+ return unless error.respond_to?(:backtrace)
43
+ return if error.backtrace.nil?
44
+
45
+ error.backtrace[0..limit].each { |frame| print_error(frame, **options) }
46
+ end
47
+
48
+ def print_message(message, level: :info, output: $stdout, newline: true, **fields)
49
+ output =
50
+ case output
51
+ when :stdout, "stdout"
52
+ $stdout
53
+ when :stderr, "stderr"
54
+ $stderr
55
+ when IO, StringIO
56
+ output
57
+ else
58
+ raise ArgumentError, "Invalid output #{output.inspect}"
59
+ end
60
+
61
+ message = format(message, **fields)
62
+ newline ? output.puts(message) : output.print(message)
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "dry/inflector"
5
+ rescue LoadError
6
+ # noop
7
+ end
8
+
9
+ begin
10
+ require "active_support/inflector"
11
+ rescue LoadError
12
+ # noop
13
+ end
14
+
15
+ module SiteMaps::Primitives
16
+ class String < ::String
17
+ def classify
18
+ new_str = if defined?(Dry::Inflector)
19
+ Dry::Inflector.new.classify(self)
20
+ elsif defined?(ActiveSupport::Inflector)
21
+ ActiveSupport::Inflector.classify(self)
22
+ else
23
+ split("_").map(&:capitalize).join
24
+ end
25
+
26
+ self.class.new(new_str)
27
+ end
28
+
29
+ def underscore
30
+ new_str = sub(/^::/, "")
31
+ .gsub("::", "/")
32
+ .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
33
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
34
+ .tr("-", "_")
35
+ .tr(".", "_")
36
+ .gsub(/\s/, "_")
37
+ .gsub(/__+/, "_")
38
+ .downcase
39
+
40
+ self.class.new(new_str)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ Process = Concurrent::ImmutableStruct.new(:name, :location_template, :kwargs_template, :block) do
5
+ def location(**kwargs)
6
+ return unless location_template
7
+
8
+ location_template % keyword_arguments(kwargs)
9
+ end
10
+
11
+ def call(builder, **kwargs)
12
+ return unless block
13
+
14
+ block.call(builder, **keyword_arguments(kwargs))
15
+ end
16
+
17
+ def static?
18
+ !dynamic?
19
+ end
20
+
21
+ def dynamic?
22
+ kwargs_template.is_a?(Hash) && kwargs_template.any?
23
+ end
24
+
25
+ def keyword_arguments(given)
26
+ (kwargs_template || {}).merge(given || {})
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "singleton"
4
+ Kernel.require "rails/railtie"
5
+
6
+ module SiteMaps
7
+ class Railtie < ::Rails::Railtie
8
+ initializer "site_maps.named_routes" do
9
+ named_route = Class.new do
10
+ include Singleton
11
+ include ::Rails.application.routes.url_helpers
12
+ end
13
+ SiteMaps::Adapters::Adapter.prepend(Module.new do
14
+ define_method(:route) { named_route.instance }
15
+ end)
16
+ end
17
+ end
18
+ end