site_maps 0.0.1.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/main.yml +45 -0
  3. data/.gitignore +16 -0
  4. data/.rspec +1 -0
  5. data/.rubocop.yml +36 -0
  6. data/.tool-versions +1 -0
  7. data/CHANGELOG.md +8 -0
  8. data/Gemfile +18 -0
  9. data/Gemfile.lock +134 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +186 -0
  12. data/Rakefile +4 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +7 -0
  15. data/exec/site_maps +9 -0
  16. data/lib/site-maps.rb +3 -0
  17. data/lib/site_maps/adapters/adapter.rb +80 -0
  18. data/lib/site_maps/adapters/aws_sdk/config.rb +51 -0
  19. data/lib/site_maps/adapters/aws_sdk/location.rb +9 -0
  20. data/lib/site_maps/adapters/aws_sdk/storage.rb +52 -0
  21. data/lib/site_maps/adapters/aws_sdk.rb +31 -0
  22. data/lib/site_maps/adapters/file_system/config.rb +5 -0
  23. data/lib/site_maps/adapters/file_system/location.rb +35 -0
  24. data/lib/site_maps/adapters/file_system/storage.rb +61 -0
  25. data/lib/site_maps/adapters/file_system.rb +26 -0
  26. data/lib/site_maps/adapters/noop.rb +18 -0
  27. data/lib/site_maps/atomic_repository.rb +24 -0
  28. data/lib/site_maps/builder/link.rb +27 -0
  29. data/lib/site_maps/builder/normalizer.rb +48 -0
  30. data/lib/site_maps/builder/sitemap_index/item.rb +35 -0
  31. data/lib/site_maps/builder/sitemap_index.rb +40 -0
  32. data/lib/site_maps/builder/url.rb +152 -0
  33. data/lib/site_maps/builder/url_set.rb +92 -0
  34. data/lib/site_maps/cli.rb +68 -0
  35. data/lib/site_maps/configuration.rb +119 -0
  36. data/lib/site_maps/incremental_location.rb +62 -0
  37. data/lib/site_maps/notification/bus.rb +90 -0
  38. data/lib/site_maps/notification/event.rb +50 -0
  39. data/lib/site_maps/notification/publisher.rb +78 -0
  40. data/lib/site_maps/notification.rb +36 -0
  41. data/lib/site_maps/primitives/array.rb +15 -0
  42. data/lib/site_maps/primitives/output.rb +66 -0
  43. data/lib/site_maps/primitives/string.rb +43 -0
  44. data/lib/site_maps/process.rb +29 -0
  45. data/lib/site_maps/railtie.rb +18 -0
  46. data/lib/site_maps/runner/event_listener.rb +78 -0
  47. data/lib/site_maps/runner.rb +136 -0
  48. data/lib/site_maps/sitemap_builder.rb +75 -0
  49. data/lib/site_maps/sitemap_reader.rb +56 -0
  50. data/lib/site_maps/version.rb +5 -0
  51. data/lib/site_maps.rb +112 -0
  52. data/site_maps.gemspec +44 -0
  53. metadata +172 -0
@@ -0,0 +1,68 @@
1
+ require "thor"
2
+
3
+ module SiteMaps
4
+ class CLI < Thor
5
+ method_option :debug, type: :boolean, default: false
6
+ method_option :logfile, type: :string, default: nil
7
+ method_option :pidfile, type: :string, default: nil
8
+ method_option :config_file, type: :string, aliases: "-r", default: nil
9
+ method_option :max_threads, type: :numeric, aliases: "-c", default: 4
10
+ method_option :context, type: :hash, default: {}
11
+ method_option :enqueue_remaining, type: :boolean, default: false
12
+
13
+ desc "generate 1st_process,2nd_process ... ,Nth_process", "Generate sitemap.xml files for the given processes"
14
+ default_command :start
15
+
16
+ def generate(processes = "")
17
+ load_rails if rails_app?
18
+
19
+ opts = (@options || {}).transform_keys(&:to_sym)
20
+ if (logfile = opts[:logfile])
21
+ SiteMaps.logger = Logger.new(logfile)
22
+ end
23
+ if opts[:debug]
24
+ SiteMaps.logger.level = Logger::DEBUG
25
+ end
26
+
27
+ SiteMaps::Notification.subscribe(SiteMaps::Runner::EventListener)
28
+
29
+ runner = SiteMaps.generate(
30
+ config_file: opts[:config_file],
31
+ max_threads: opts[:max_threads]
32
+ )
33
+ if processes.empty?
34
+ runner.enqueue_all
35
+ else
36
+ kwargs = (opts[:context] || {}).transform_keys(&:to_sym)
37
+ processes.split(",").each do |process|
38
+ runner.enqueue(process.strip.to_sym, **kwargs)
39
+ end
40
+ end
41
+ if opts[:enqueue_remaining]
42
+ runner.enqueue_remaining
43
+ end
44
+
45
+ runner.run
46
+ end
47
+
48
+ desc "version", "Print the version"
49
+ def version
50
+ puts "SiteMaps version: #{SiteMaps::VERSION}"
51
+ end
52
+
53
+ default_task :help
54
+
55
+ private
56
+
57
+ def rails_app?
58
+ File.exist?(File.join(Dir.pwd, "config", "application.rb"))
59
+ end
60
+
61
+ def load_rails
62
+ require File.expand_path(File.join(Dir.pwd, "config", "application.rb"))
63
+ require_relative "railtie"
64
+
65
+ ::Rails.application.require_environment!
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ class Configuration
5
+ class << self
6
+ def attributes
7
+ @attributes || {}
8
+ end
9
+
10
+ def attribute(name, default: nil)
11
+ @attributes ||= {}
12
+ @attributes[name] = default
13
+
14
+ unless method_defined?(name)
15
+ define_method(name) do
16
+ instance_variable_get(:"@#{name}")
17
+ end
18
+ end
19
+
20
+ unless method_defined?(:"#{name}=")
21
+ define_method(:"#{name}=") do |value|
22
+ instance_variable_set(:"@#{name}", value)
23
+ end
24
+ end
25
+
26
+ unless method_defined?(:"#{name}?")
27
+ define_method(:"#{name}?") do
28
+ !!send(name)
29
+ end
30
+ end
31
+ end
32
+
33
+ def inherited(subclass)
34
+ subclass.instance_variable_set(:@attributes, attributes.dup)
35
+ end
36
+ end
37
+
38
+ attribute :url
39
+ attribute :directory, default: "/tmp/sitemaps"
40
+
41
+ def initialize(**options)
42
+ default_attributes.merge(options).each do |key, value|
43
+ send(:"#{key}=", value)
44
+ rescue NoMethodError
45
+ raise ConfigurationError, <<~ERROR
46
+ Unknown configuration option: #{key}
47
+ ERROR
48
+ end
49
+ end
50
+
51
+ def becomes(klass, **options)
52
+ klass.new(**to_h, **options)
53
+ end
54
+
55
+ def to_h
56
+ instance_variables.each_with_object({}) do |var, hash|
57
+ hash[var.to_s.delete("@").to_sym] = instance_variable_get(var)
58
+ end
59
+ end
60
+
61
+ def url
62
+ @url || validate_url!
63
+ end
64
+
65
+ def base_uri
66
+ ::URI.parse(url).tap do |uri|
67
+ uri.path = ""
68
+ uri.query = nil
69
+ uri.fragment = nil
70
+ end
71
+ end
72
+
73
+ def local_sitemap_path
74
+ filename = ::File.basename(url)
75
+ Pathname.new(directory).join(filename)
76
+ end
77
+
78
+ def fetch_sitemap_index_links
79
+ doc = SiteMaps::SitemapReader.new(local_sitemap_path.exist? ? local_sitemap_path : url).to_doc
80
+
81
+ doc.css("sitemapindex sitemap").map do |url|
82
+ SiteMaps::Builder::SitemapIndex::Item.new(
83
+ url.at_css("loc").text,
84
+ url.at_css("lastmod")&.text
85
+ )
86
+ end
87
+ rescue SiteMaps::SitemapReader::Error
88
+ []
89
+ end
90
+
91
+ def remote_sitemap_directory
92
+ path = ::URI.parse(url).path
93
+ path = path[1..-1] if path.start_with?("/")
94
+ path.split("/")[0..-2].join("/")
95
+ end
96
+
97
+ private
98
+
99
+ def validate_url!
100
+ return if @url
101
+
102
+ raise ConfigurationError, <<~ERROR
103
+ You must set a sitemap URL in your configuration to use the add method.
104
+
105
+ Example:
106
+ SiteMaps.configure do |config|
107
+ config.url = "https://example.com/sitemap.xml"
108
+ end
109
+ ERROR
110
+ end
111
+
112
+ def default_attributes
113
+ self.class.attributes.each_with_object({}) do |(key, default), hash|
114
+ value = default.respond_to?(:call) ? default.call : default
115
+ hash[key] = value unless value.nil?
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ class IncrementalLocation
5
+ FILENAME = "sitemap.xml"
6
+ PLACEHOLDER = "%{index}"
7
+
8
+ def initialize(main_url, process_location)
9
+ @main_uri = URI(main_url)
10
+ @index = Concurrent::AtomicFixnum.new(0)
11
+ normalize(process_location || @main_uri.to_s)
12
+ end
13
+
14
+ def url
15
+ placeholder_url % {index: @index.value}
16
+ end
17
+
18
+ def next
19
+ @index.increment
20
+ self
21
+ end
22
+
23
+ def main_url
24
+ main_uri.to_s
25
+ end
26
+
27
+ def relative_directory
28
+ File.dirname(@uri.path).sub(%r{^/}, "")
29
+ end
30
+
31
+ private
32
+
33
+ attr_reader :main_uri, :placeholder_url
34
+
35
+ def base_url
36
+ main_uri.dup.tap { |uri| uri.path = "" }
37
+ end
38
+
39
+ def base_dir
40
+ File.dirname(main_uri.path)
41
+ end
42
+
43
+ def normalize(loc)
44
+ uri = if %r{^https?://}.match?(loc)
45
+ URI(loc)
46
+ elsif loc.start_with?("/")
47
+ main_uri.dup.tap { |uri| uri.path = loc }
48
+ else
49
+ main_uri.dup.tap { |uri| uri.path = File.join(base_dir, loc) }
50
+ end
51
+ unless %w[.xml .xml.gz].include?(File.extname(uri.path))
52
+ uri.path = File.join(uri.path, FILENAME)
53
+ end
54
+ base = uri.dup.tap { |v| v.path = "" }.to_s
55
+ basename = File.basename(uri.path)
56
+ index_basename = basename.sub(/[\.](xml|xml\.gz)$/, "#{PLACEHOLDER}.\\1")
57
+
58
+ @placeholder_url = File.join(base, File.join(File.dirname(uri.path), index_basename))
59
+ @uri = URI(File.join(base, File.join(File.dirname(uri.path), basename)))
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ module Notification
5
+ class Bus
6
+ attr_reader :listeners, :events
7
+
8
+ def initialize
9
+ @listeners = Concurrent::Hash.new { |h, k| h[k] = Concurrent::Array.new }
10
+ @events = Concurrent::Hash.new
11
+ end
12
+
13
+ def publish(event_id, payload)
14
+ raise UnregisteredEventError, event_id unless can_handle?(event_id)
15
+
16
+ process(event_id, payload) do |event, listener|
17
+ # Concurrent::Future.execute { listener.call(event) }
18
+ listener.call(event)
19
+ end
20
+ self
21
+ end
22
+
23
+ def attach(listener)
24
+ events.each do |id, event|
25
+ method_name = event.listener_method
26
+ next unless listener.respond_to?(method_name)
27
+
28
+ listeners[id] << listener.method(method_name)
29
+ end
30
+ self
31
+ end
32
+
33
+ def unsubscribe(listener)
34
+ listeners.each do |id, arr|
35
+ arr.each do |func|
36
+ listeners[id].delete(func) if func.receiver == listener
37
+ end
38
+ end
39
+ self
40
+ end
41
+ alias_method :detach, :unsubscribe
42
+
43
+ def subscribe(object_or_event_id, &block)
44
+ raise(InvalidSubscriberError, object_or_event_id) unless can_handle?(object_or_event_id)
45
+
46
+ if block
47
+ listeners[object_or_event_id] << block
48
+ else
49
+ attach(object_or_event_id)
50
+ end
51
+
52
+ self
53
+ end
54
+
55
+ # rubocop:disable Performance/RedundantEqualityComparisonBlock
56
+ def subscribed?(listener)
57
+ listeners.values.any? { |value| value.any? { |func| func == listener } } ||
58
+ (
59
+ methods = events.values.map(&:listener_method)
60
+ .select { |method_name| listener.respond_to?(method_name) }
61
+ .map { |method_name| listener.method(method_name) }
62
+ methods && listeners.values.any? { |value| (methods & value).size > 0 }
63
+ )
64
+ end
65
+ # rubocop:enable Performance/RedundantEqualityComparisonBlock
66
+
67
+ def can_handle?(object_or_event_id)
68
+ case object_or_event_id
69
+ when String, Symbol
70
+ events.key?(object_or_event_id)
71
+ else
72
+ events
73
+ .values
74
+ .map(&:listener_method)
75
+ .any? { |method_name| object_or_event_id.respond_to?(method_name) }
76
+ end
77
+ end
78
+
79
+ protected
80
+
81
+ def process(event_id, payload)
82
+ listeners[event_id].each do |listener|
83
+ event = events[event_id].payload(payload)
84
+
85
+ yield(event, listener)
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ module Notification
5
+ class Event
6
+ extend Forwardable
7
+
8
+ def_delegators :@payload, :[], :fetch, :to_h, :key?
9
+ alias_method :to_hash, :to_h
10
+
11
+ attr_reader :id
12
+
13
+ # Initialize a new event
14
+ #
15
+ # @param [Symbol, String] id The event identifier
16
+ # @param [Hash] payload
17
+ #
18
+ # @return [Event]
19
+ #
20
+ # @api private
21
+ def initialize(id, payload = {})
22
+ @id = id
23
+ @payload = payload
24
+ end
25
+
26
+ # Get or set a payload
27
+ #
28
+ # @overload
29
+ # @return [Hash] payload
30
+ #
31
+ # @overload payload(data)
32
+ # @param [Hash] data A new payload
33
+ # @return [Event] A copy of the event with the provided payload
34
+ #
35
+ # @api public
36
+ def payload(data = nil)
37
+ if data
38
+ self.class.new(id, @payload.merge(data))
39
+ else
40
+ @payload
41
+ end
42
+ end
43
+
44
+ # @api private
45
+ def listener_method
46
+ @listener_method ||= Primitives::String.new("on_#{id}").underscore.to_sym
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Notification
4
+ module Publisher
5
+ def self.included(klass)
6
+ klass.extend(ClassMethods)
7
+ end
8
+
9
+ # Class interface for publishers
10
+ #
11
+ # @api public
12
+ module ClassMethods
13
+ extend Forwardable
14
+ def_delegators :bus, :publish, :subscribed?, :unsubscribe
15
+
16
+ # Register a new event type
17
+ #
18
+ # @param [Symbol,String] event_id The event identifier
19
+ # @param [Hash] payload Optional default payload
20
+ #
21
+ # @return [self]
22
+ #
23
+ # @api public
24
+ def register_event(event_id, payload = {})
25
+ bus.events[event_id] = Event.new(event_id, payload)
26
+ self
27
+ end
28
+
29
+ # Publish an event with extra runtime information to the payload
30
+ #
31
+ # @param [String] event_id The event identifier
32
+ # @param [Hash] payload An optional payload
33
+ # @raise [UnregisteredEventError] if the event is not registered
34
+ #
35
+ # @api public
36
+ def instrument(event_id, payload = {})
37
+ publish_event = false # ensure block is also called on error
38
+ raise(UnregisteredEventError, event_id) unless bus.can_handle?(event_id)
39
+
40
+ payload[:__started_at__] = Time.now
41
+ yield(payload).tap { publish_event = true }
42
+ ensure
43
+ if publish_event
44
+ payload[:runtime] ||= Time.now - payload.delete(:__started_at__) if payload[:__started_at__]
45
+ bus.publish(event_id, payload)
46
+ end
47
+ end
48
+
49
+ # Subscribe to events.
50
+ #
51
+ # @param [Symbol,String,Object] object_or_event_id The event identifier or a listener object
52
+ # @param [Hash] filter_hash An optional event filter
53
+ #
54
+ # @raise [SiteMaps::Notification::InvalidSubscriberError] if the subscriber is not registered
55
+ # @return [Object] self
56
+ #
57
+ #
58
+ # @api public
59
+ def subscribe(object_or_event_id, &block)
60
+ if bus.can_handle?(object_or_event_id)
61
+ if block
62
+ bus.subscribe(object_or_event_id, &block)
63
+ else
64
+ bus.attach(object_or_event_id)
65
+ end
66
+
67
+ self
68
+ else
69
+ raise InvalidSubscriberError, object_or_event_id
70
+ end
71
+ end
72
+
73
+ def bus
74
+ @bus ||= Bus.new
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ module Notification
5
+ Error = Class.new(SiteMaps::Error)
6
+
7
+ class UnregisteredEventError < Error
8
+ def initialize(object_or_event_id)
9
+ case object_or_event_id
10
+ when String, Symbol
11
+ super("You are trying to publish an unregistered event: `#{object_or_event_id}`")
12
+ else
13
+ super("You are trying to publish an unregistered event")
14
+ end
15
+ end
16
+ end
17
+
18
+ class InvalidSubscriberError < Error
19
+ def initialize(object_or_event_id)
20
+ case object_or_event_id
21
+ when String, Symbol
22
+ super("you are trying to subscribe to an event: `#{object_or_event_id}` that has not been registered")
23
+ else
24
+ super("you try use subscriber object that will never be executed")
25
+ end
26
+ end
27
+ end
28
+
29
+ include Publisher
30
+
31
+ register_event "sitemaps.builder.finalize_urlset"
32
+ register_event "sitemaps.runner.before_process_execution"
33
+ register_event "sitemaps.runner.enqueue_process"
34
+ register_event "sitemaps.runner.process_execution"
35
+ end
36
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps::Primitives
4
+ class Array < ::Array
5
+ def self.wrap(object)
6
+ if object.nil?
7
+ []
8
+ elsif object.respond_to?(:to_ary)
9
+ object.to_ary || [object]
10
+ else
11
+ [object]
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "rainbow"
5
+ rescue LoadError
6
+ end
7
+
8
+ module SiteMaps
9
+ module Primitives
10
+ module Output
11
+ module_function
12
+
13
+ def formatted_runtime(number)
14
+ colorize(sprintf("%.3f ms", number), :lightgray)
15
+ end
16
+
17
+ def runtime_padding(number, extra = 2)
18
+ " " * (extra + sprintf("%.3f ms", number).size)
19
+ end
20
+
21
+ def colorize(text, *attributes)
22
+ if defined? Rainbow
23
+ attributes.reduce(Rainbow(text)) { |p, a| p.public_send(a) }
24
+ else
25
+ text
26
+ end
27
+ end
28
+
29
+ def print_error(message_or_error, backtrace: false, **options)
30
+ options[:level] ||= :error
31
+ message = message_or_error.to_s
32
+
33
+ print_message(message, output: :stderr, **options)
34
+
35
+ if message_or_error.is_a?(Exception) && backtrace
36
+ limit = backtrace.is_a?(Integer) ? backtrace : -1
37
+ print_backtrace(message_or_error, limit: limit, level: options[:level])
38
+ end
39
+ end
40
+
41
+ def print_backtrace(error, limit: -1, **options)
42
+ return unless error.respond_to?(:backtrace)
43
+ return if error.backtrace.nil?
44
+
45
+ error.backtrace[0..limit].each { |frame| print_error(frame, **options) }
46
+ end
47
+
48
+ def print_message(message, level: :info, output: $stdout, newline: true, **fields)
49
+ output =
50
+ case output
51
+ when :stdout, "stdout"
52
+ $stdout
53
+ when :stderr, "stderr"
54
+ $stderr
55
+ when IO, StringIO
56
+ output
57
+ else
58
+ raise ArgumentError, "Invalid output #{output.inspect}"
59
+ end
60
+
61
+ message = format(message, **fields)
62
+ newline ? output.puts(message) : output.print(message)
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "dry/inflector"
5
+ rescue LoadError
6
+ # noop
7
+ end
8
+
9
+ begin
10
+ require "active_support/inflector"
11
+ rescue LoadError
12
+ # noop
13
+ end
14
+
15
+ module SiteMaps::Primitives
16
+ class String < ::String
17
+ def classify
18
+ new_str = if defined?(Dry::Inflector)
19
+ Dry::Inflector.new.classify(self)
20
+ elsif defined?(ActiveSupport::Inflector)
21
+ ActiveSupport::Inflector.classify(self)
22
+ else
23
+ split("_").map(&:capitalize).join
24
+ end
25
+
26
+ self.class.new(new_str)
27
+ end
28
+
29
+ def underscore
30
+ new_str = sub(/^::/, "")
31
+ .gsub("::", "/")
32
+ .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
33
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
34
+ .tr("-", "_")
35
+ .tr(".", "_")
36
+ .gsub(/\s/, "_")
37
+ .gsub(/__+/, "_")
38
+ .downcase
39
+
40
+ self.class.new(new_str)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SiteMaps
4
+ Process = Concurrent::ImmutableStruct.new(:name, :location_template, :kwargs_template, :block) do
5
+ def location(**kwargs)
6
+ return unless location_template
7
+
8
+ location_template % keyword_arguments(kwargs)
9
+ end
10
+
11
+ def call(builder, **kwargs)
12
+ return unless block
13
+
14
+ block.call(builder, **keyword_arguments(kwargs))
15
+ end
16
+
17
+ def static?
18
+ !dynamic?
19
+ end
20
+
21
+ def dynamic?
22
+ kwargs_template.is_a?(Hash) && kwargs_template.any?
23
+ end
24
+
25
+ def keyword_arguments(given)
26
+ (kwargs_template || {}).merge(given || {})
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "singleton"
4
+ Kernel.require "rails/railtie"
5
+
6
+ module SiteMaps
7
+ class Railtie < ::Rails::Railtie
8
+ initializer "site_maps.named_routes" do
9
+ named_route = Class.new do
10
+ include Singleton
11
+ include ::Rails.application.routes.url_helpers
12
+ end
13
+ SiteMaps::Adapters::Adapter.prepend(Module.new do
14
+ define_method(:route) { named_route.instance }
15
+ end)
16
+ end
17
+ end
18
+ end