karafka 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.console_irbrc +13 -0
  3. data/.gitignore +68 -0
  4. data/.rspec +1 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +18 -0
  8. data/CHANGELOG.md +415 -0
  9. data/CODE_OF_CONDUCT.md +46 -0
  10. data/CONTRIBUTING.md +41 -0
  11. data/Gemfile +11 -0
  12. data/Gemfile.lock +123 -0
  13. data/MIT-LICENCE +18 -0
  14. data/README.md +89 -0
  15. data/bin/karafka +19 -0
  16. data/config/errors.yml +6 -0
  17. data/karafka.gemspec +37 -0
  18. data/lib/karafka.rb +78 -0
  19. data/lib/karafka/app.rb +45 -0
  20. data/lib/karafka/attributes_map.rb +67 -0
  21. data/lib/karafka/backends/inline.rb +16 -0
  22. data/lib/karafka/base_consumer.rb +68 -0
  23. data/lib/karafka/base_responder.rb +204 -0
  24. data/lib/karafka/callbacks.rb +30 -0
  25. data/lib/karafka/callbacks/config.rb +22 -0
  26. data/lib/karafka/callbacks/dsl.rb +16 -0
  27. data/lib/karafka/cli.rb +54 -0
  28. data/lib/karafka/cli/base.rb +78 -0
  29. data/lib/karafka/cli/console.rb +29 -0
  30. data/lib/karafka/cli/flow.rb +46 -0
  31. data/lib/karafka/cli/info.rb +29 -0
  32. data/lib/karafka/cli/install.rb +42 -0
  33. data/lib/karafka/cli/server.rb +66 -0
  34. data/lib/karafka/connection/client.rb +117 -0
  35. data/lib/karafka/connection/config_adapter.rb +120 -0
  36. data/lib/karafka/connection/delegator.rb +46 -0
  37. data/lib/karafka/connection/listener.rb +60 -0
  38. data/lib/karafka/consumers/callbacks.rb +54 -0
  39. data/lib/karafka/consumers/includer.rb +51 -0
  40. data/lib/karafka/consumers/responders.rb +24 -0
  41. data/lib/karafka/consumers/single_params.rb +15 -0
  42. data/lib/karafka/errors.rb +50 -0
  43. data/lib/karafka/fetcher.rb +44 -0
  44. data/lib/karafka/helpers/class_matcher.rb +78 -0
  45. data/lib/karafka/helpers/config_retriever.rb +46 -0
  46. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  47. data/lib/karafka/instrumentation/listener.rb +112 -0
  48. data/lib/karafka/instrumentation/logger.rb +55 -0
  49. data/lib/karafka/instrumentation/monitor.rb +64 -0
  50. data/lib/karafka/loader.rb +28 -0
  51. data/lib/karafka/params/dsl.rb +156 -0
  52. data/lib/karafka/params/params_batch.rb +46 -0
  53. data/lib/karafka/parsers/json.rb +38 -0
  54. data/lib/karafka/patches/dry_configurable.rb +35 -0
  55. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  56. data/lib/karafka/persistence/client.rb +25 -0
  57. data/lib/karafka/persistence/consumer.rb +38 -0
  58. data/lib/karafka/persistence/topic.rb +29 -0
  59. data/lib/karafka/process.rb +64 -0
  60. data/lib/karafka/responders/builder.rb +36 -0
  61. data/lib/karafka/responders/topic.rb +57 -0
  62. data/lib/karafka/routing/builder.rb +61 -0
  63. data/lib/karafka/routing/consumer_group.rb +61 -0
  64. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  65. data/lib/karafka/routing/proxy.rb +37 -0
  66. data/lib/karafka/routing/router.rb +29 -0
  67. data/lib/karafka/routing/topic.rb +60 -0
  68. data/lib/karafka/routing/topic_mapper.rb +55 -0
  69. data/lib/karafka/schemas/config.rb +24 -0
  70. data/lib/karafka/schemas/consumer_group.rb +77 -0
  71. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  72. data/lib/karafka/schemas/responder_usage.rb +39 -0
  73. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  74. data/lib/karafka/server.rb +94 -0
  75. data/lib/karafka/setup/config.rb +189 -0
  76. data/lib/karafka/setup/configurators/base.rb +29 -0
  77. data/lib/karafka/setup/configurators/params.rb +25 -0
  78. data/lib/karafka/setup/configurators/water_drop.rb +32 -0
  79. data/lib/karafka/setup/dsl.rb +22 -0
  80. data/lib/karafka/status.rb +25 -0
  81. data/lib/karafka/templates/application_consumer.rb.example +6 -0
  82. data/lib/karafka/templates/application_responder.rb.example +11 -0
  83. data/lib/karafka/templates/karafka.rb.example +54 -0
  84. data/lib/karafka/version.rb +7 -0
  85. data/log/.gitkeep +0 -0
  86. metadata +301 -0
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all the things related to Kafka connection
5
+ module Connection
6
+ # Mapper used to convert our internal settings into ruby-kafka settings
7
+ # Since ruby-kafka has more and more options and there are few "levels" on which
8
+ # we have to apply them (despite the fact, that in Karafka you configure all of it
9
+ # in one place), we have to remap it into what ruby-kafka driver requires
10
+ # @note The good thing about Kafka.new method is that it ignores all options that
11
+ # do nothing. So we don't have to worry about injecting our internal settings
12
+ # into the client and breaking stuff
13
+ module ConfigAdapter
14
+ class << self
15
+ # Builds all the configuration settings for Kafka.new method
16
+ # @param _consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
+ # @return [Array<Hash>] Array with all the client arguments including hash with all
18
+ # the settings required by Kafka.new method
19
+ # @note We return array, so we can inject any arguments we want, in case of changes in the
20
+ # raw driver
21
+ def client(_consumer_group)
22
+ # This one is a default that takes all the settings except special
23
+ # cases defined in the map
24
+ settings = {
25
+ logger: ::Karafka.logger,
26
+ client_id: ::Karafka::App.config.client_id
27
+ }
28
+
29
+ kafka_configs.each do |setting_name, setting_value|
30
+ # All options for config adapter should be ignored as we're just interested
31
+ # in what is left, as we want to pass all the options that are "typical"
32
+ # and not listed in the config_adapter special cases mapping. All the values
33
+ # from the config_adapter mapping go somewhere else, not to the client directly
34
+ next if AttributesMap.config_adapter.values.flatten.include?(setting_name)
35
+
36
+ settings[setting_name] = setting_value
37
+ end
38
+
39
+ settings_hash = sanitize(settings)
40
+
41
+ # Normalization for the way Kafka::Client accepts arguments from 0.5.3
42
+ [settings_hash.delete(:seed_brokers), settings_hash]
43
+ end
44
+
45
+ # Builds all the configuration settings for kafka#consumer method
46
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
47
+ # @return [Array<Hash>] array with all the consumer arguments including hash with all
48
+ # the settings required by Kafka#consumer
49
+ def consumer(consumer_group)
50
+ settings = { group_id: consumer_group.id }
51
+ settings = fetch_for(:consumer, consumer_group, settings)
52
+ [sanitize(settings)]
53
+ end
54
+
55
+ # Builds all the configuration settings for kafka consumer consume_each_batch and
56
+ # consume_each_message methods
57
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
58
+ # @return [Array<Hash>] Array with all the arguments required by consuming method
59
+ # including hash with all the settings required by
60
+ # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
61
+ def consuming(consumer_group)
62
+ settings = {
63
+ automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
64
+ }
65
+ [sanitize(fetch_for(:consuming, consumer_group, settings))]
66
+ end
67
+
68
+ # Builds all the configuration settings for kafka consumer#subscribe method
69
+ # @param topic [Karafka::Routing::Topic] topic that holds details for a given subscription
70
+ # @return [Hash] hash with all the settings required by kafka consumer#subscribe method
71
+ def subscription(topic)
72
+ settings = fetch_for(:subscription, topic)
73
+ [Karafka::App.config.topic_mapper.outgoing(topic.name), sanitize(settings)]
74
+ end
75
+
76
+ # Builds all the configuration settings required by kafka consumer#pause method
77
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
78
+ # @return [Hash] hash with all the settings required to pause kafka consumer
79
+ def pausing(consumer_group)
80
+ { timeout: consumer_group.pause_timeout }
81
+ end
82
+
83
+ private
84
+
85
+ # Fetches proper settings for a given map namespace
86
+ # @param namespace_key [Symbol] namespace from attributes map config adapter hash
87
+ # @param route_layer [Object] route topic or consumer group
88
+ # @param preexisting_settings [Hash] hash with some preexisting settings that might have
89
+ # been loaded in a different way
90
+ def fetch_for(namespace_key, route_layer, preexisting_settings = {})
91
+ kafka_configs.each_key do |setting_name|
92
+ # Ignore settings that are not related to our namespace
93
+ next unless AttributesMap.config_adapter[namespace_key].include?(setting_name)
94
+ # Ignore settings that are already initialized
95
+ # In case they are in preexisting settings fetched differently
96
+ next if preexisting_settings.keys.include?(setting_name)
97
+ # Fetch all the settings from a given layer object. Objects can handle the fallback
98
+ # to the kafka settings, so
99
+ preexisting_settings[setting_name] = route_layer.send(setting_name)
100
+ end
101
+
102
+ preexisting_settings
103
+ end
104
+
105
+ # Removes nil containing keys from the final settings so it can use Kafkas driver
106
+ # defaults for those
107
+ # @param settings [Hash] settings that may contain nil values
108
+ # @return [Hash] settings without nil using keys (non of karafka options should be nil)
109
+ def sanitize(settings)
110
+ settings.reject { |_key, value| value.nil? }
111
+ end
112
+
113
+ # @return [Hash] Kafka config details as a hash
114
+ def kafka_configs
115
+ ::Karafka::App.config.kafka.to_h
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of messages for which we listen to a proper processor
6
+ module Delegator
7
+ class << self
8
+ # Delegates messages (does something with them)
9
+ # It will either schedule or run a proper processor action for messages
10
+ # @note This should be looped to obtain a constant delegating of new messages
11
+ # @note We catch all the errors here, to make sure that none failures
12
+ # for a given consumption will affect other consumed messages
13
+ # If we wouldn't catch it, it would propagate up until killing the thread
14
+ # @note It is a one huge method, because of performance reasons. It is much faster then
15
+ # using send or invoking additional methods
16
+ # @param group_id [String] group_id of a group from which a given message came
17
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
18
+ def call(group_id, kafka_messages)
19
+ # @note We always get messages by topic and partition so we can take topic from the
20
+ # first one and it will be valid for all the messages
21
+ topic = Persistence::Topic.fetch(group_id, kafka_messages[0].topic)
22
+ consumer = Persistence::Consumer.fetch(topic, kafka_messages[0].partition)
23
+
24
+ Karafka.monitor.instrument(
25
+ 'connection.delegator.call',
26
+ caller: self,
27
+ consumer: consumer,
28
+ kafka_messages: kafka_messages
29
+ ) do
30
+ # Depending on a case (persisted or not) we might use new consumer instance per
31
+ # each batch, or use the same one for all of them (for implementing buffering, etc.)
32
+ if topic.batch_consuming
33
+ consumer.params_batch = kafka_messages
34
+ consumer.call
35
+ else
36
+ kafka_messages.each do |kafka_message|
37
+ consumer.params_batch = [kafka_message]
38
+ consumer.call
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # A single listener that listens to incoming messages from a single route
6
+ # @note It does not loop on itself - it needs to be executed in a loop
7
+ # @note Listener itself does nothing with the message - it will return to the block
8
+ # a raw Kafka::FetchedMessage
9
+ class Listener
10
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
11
+ # on what topics and with what settings should we listen
12
+ # @return [Karafka::Connection::Listener] listener instance
13
+ def initialize(consumer_group)
14
+ @consumer_group = consumer_group
15
+ end
16
+
17
+ # Runs prefetch callbacks and executes the main listener fetch loop
18
+ def call
19
+ Karafka::Callbacks.before_fetch_loop(
20
+ @consumer_group,
21
+ client
22
+ )
23
+ fetch_loop
24
+ end
25
+
26
+ private
27
+
28
+ # Opens connection, gets messages and calls a block for each of the incoming messages
29
+ # @yieldparam [String] consumer group id
30
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
31
+ # @note This will yield with a raw message - no preprocessing or reformatting
32
+ # @note We catch all the errors here, so they don't affect other listeners (or this one)
33
+ # so we will be able to listen and consume other incoming messages.
34
+ # Since it is run inside Karafka::Connection::ActorCluster - catching all the exceptions
35
+ # won't crash the whole cluster. Here we mostly focus on catchin the exceptions related to
36
+ # Kafka connections / Internet connection issues / Etc. Business logic problems should not
37
+ # propagate this far
38
+ def fetch_loop
39
+ client.fetch_loop do |raw_messages|
40
+ # @note What happens here is a delegation of processing to a proper processor based
41
+ # on the incoming messages characteristics
42
+ Karafka::Connection::Delegator.call(@consumer_group.id, raw_messages)
43
+ end
44
+ # This is on purpose - see the notes for this method
45
+ # rubocop:disable RescueException
46
+ rescue Exception => e
47
+ Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
48
+ # rubocop:enable RescueException
49
+ @client&.stop
50
+ retry if @client
51
+ end
52
+
53
+ # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
54
+ # consumption
55
+ def client
56
+ @client ||= Client.new(@consumer_group)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Additional callbacks that can be used to trigger some actions on certain moments like
6
+ # manual offset management, committing or anything else outside of a standard messages flow
7
+ # They are not included by default, as we don't want to provide functionalities that are
8
+ # not required by users by default
9
+ # Please refer to the wiki callbacks page for more details on how to use them
10
+ module Callbacks
11
+ # Types of events on which we run callbacks
12
+ TYPES = %i[
13
+ after_fetch
14
+ after_poll
15
+ before_poll
16
+ before_stop
17
+ ].freeze
18
+
19
+ # Class methods needed to make callbacks run
20
+ module ClassMethods
21
+ TYPES.each do |type|
22
+ # A Creates a callback wrapper
23
+ # @param method_name [Symbol, String] method name or nil if we plan to provide a block
24
+ # @yield A block with a code that should be executed before scheduling
25
+ define_method type do |method_name = nil, &block|
26
+ set_callback type, :before, method_name ? method_name : block
27
+ end
28
+ end
29
+ end
30
+
31
+ # @param consumer_class [Class] consumer class that we extend with callbacks
32
+ def self.included(consumer_class)
33
+ consumer_class.class_eval do
34
+ extend ClassMethods
35
+ include ActiveSupport::Callbacks
36
+
37
+ # The call method is wrapped with a set of callbacks
38
+ # We won't run process if any of the callbacks throw abort
39
+ # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
40
+ TYPES.each { |type| define_callbacks type }
41
+ end
42
+ end
43
+
44
+ # Executes the default consumer flow, runs callbacks and if not halted will call process
45
+ # method of a proper backend. It is here because it interacts with the default Karafka
46
+ # call flow and needs to be overwritten to support callbacks
47
+ def call
48
+ run_callbacks :after_fetch do
49
+ process
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional functionalities for consumers
5
+ module Consumers
6
+ # Module used to inject functionalities into a given consumer class, based on the consumer
7
+ # topic and its settings
8
+ # We don't need all the behaviors in all the cases, so it is not worth having everything
9
+ # in all the cases all the time
10
+ module Includer
11
+ class << self
12
+ # @param consumer_class [Class] consumer class, that will get some functionalities
13
+ # based on the topic under which it operates
14
+ def call(consumer_class)
15
+ topic = consumer_class.topic
16
+
17
+ bind_backend(consumer_class, topic)
18
+ bind_params(consumer_class, topic)
19
+ bind_responders(consumer_class, topic)
20
+ end
21
+
22
+ private
23
+
24
+ # Figures out backend for a given consumer class, based on the topic backend and
25
+ # includes it into the consumer class
26
+ # @param consumer_class [Class] consumer class
27
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
28
+ def bind_backend(consumer_class, topic)
29
+ backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
30
+ consumer_class.include backend
31
+ end
32
+
33
+ # Adds a single #params support for non batch processed topics
34
+ # @param consumer_class [Class] consumer class
35
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
36
+ def bind_params(consumer_class, topic)
37
+ return if topic.batch_consuming
38
+ consumer_class.include SingleParams
39
+ end
40
+
41
+ # Adds responders support for topics and consumers with responders defined for them
42
+ # @param consumer_class [Class] consumer class
43
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
44
+ def bind_responders(consumer_class, topic)
45
+ return unless topic.responder
46
+ consumer_class.include Responders
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Feature that allows us to use responders flow in consumer
6
+ module Responders
7
+ # Responds with given data using given responder. This allows us to have a similar way of
8
+ # defining flows like synchronous protocols
9
+ # @param data Anything we want to pass to responder based on which we want to trigger further
10
+ # Kafka responding
11
+ def respond_with(*data)
12
+ Karafka.monitor.instrument(
13
+ 'consumers.responders.respond_with',
14
+ caller: self,
15
+ data: data
16
+ ) do
17
+ # @note we build a new instance of responder each time, as a long-running (persisted)
18
+ # consumers can respond multiple times during the lifecycle
19
+ topic.responder.new(topic.parser).call(*data)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Params alias for single message consumption consumers
6
+ module SingleParams
7
+ private
8
+
9
+ # @return [Karafka::Params::Params] params instance for non batch consumption consumers
10
+ def params
11
+ params_batch.first
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace used to encapsulate all the internal errors of Karafka
5
+ module Errors
6
+ # Base class for all the Karafka internal errors
7
+ BaseError = Class.new(StandardError)
8
+
9
+ # Should be raised when we attemp to parse incoming params but parsing fails
10
+ # If this error (or its descendant) is detected, we will pass the raw message
11
+ # into params and proceed further
12
+ ParserError = Class.new(BaseError)
13
+
14
+ # Raised when router receives topic name which does not correspond with any routes
15
+ # This can only happen in a case when:
16
+ # - you've received a message and we cannot match it with a consumer
17
+ # - you've changed the routing, so router can no longer associate your topic to
18
+ # any consumer
19
+ # - or in a case when you do a lot of metaprogramming and you change routing/etc on runtime
20
+ #
21
+ # In case this happens, you will have to create a temporary route that will allow
22
+ # you to "eat" everything from the Sidekiq queue.
23
+ # @see https://github.com/karafka/karafka/issues/135
24
+ NonMatchingRouteError = Class.new(BaseError)
25
+
26
+ # Raised when we don't use or use responder not in the way it expected to based on the
27
+ # topics usage definitions
28
+ InvalidResponderUsage = Class.new(BaseError)
29
+
30
+ # Raised when options that we provide to the responder to respond aren't what the schema
31
+ # requires
32
+ InvalidResponderMessageOptions = Class.new(BaseError)
33
+
34
+ # Raised when configuration doesn't match with validation schema
35
+ InvalidConfiguration = Class.new(BaseError)
36
+
37
+ # Raised when we try to use Karafka CLI commands (except install) without a bootfile
38
+ MissingBootFile = Class.new(BaseError)
39
+
40
+ # Raised when we want to read a persisted thread messages consumer but it is unavailable
41
+ # This should never happen and if it does, please contact us
42
+ MissingClient = Class.new(BaseError)
43
+
44
+ # Raised when we attemp to pause a partition but the pause timeout is equal to 0
45
+ InvalidPauseTimeout = Class.new(BaseError)
46
+
47
+ # Raised when want to hook up to an event that is not registered and supported
48
+ UnregisteredMonitorEvent = Class.new(BaseError)
49
+ end
50
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Class used to run the Karafka consumer and handle shutting down, restarting etc
5
+ # @note Creating multiple fetchers will result in having multiple connections to the same
6
+ # topics, which means that if there are no partitions, it won't use them.
7
+ class Fetcher
8
+ class << self
9
+ # Starts listening on all the listeners asynchronously
10
+ # Fetch loop should never end, which means that we won't create more actor clusters
11
+ # so we don't have to terminate them
12
+ def call
13
+ threads = listeners.map do |listener|
14
+ # We abort on exception because there should be an exception handling developed for
15
+ # each listener running in separate threads, so the exceptions should never leak
16
+ # and if that happens, it means that something really bad happened and we should stop
17
+ # the whole process
18
+ Thread
19
+ .new { listener.call }
20
+ .tap { |thread| thread.abort_on_exception = true }
21
+ end
22
+
23
+ # We aggregate threads here for a supervised shutdown process
24
+ threads.each { |thread| Karafka::Server.consumer_threads << thread }
25
+ threads.each(&:join)
26
+ # If anything crashes here, we need to raise the error and crush the runner because it means
27
+ # that something terrible happened
28
+ rescue StandardError => e
29
+ Karafka.monitor.instrument('fetcher.call.error', caller: self, error: e)
30
+ Karafka::App.stop!
31
+ raise e
32
+ end
33
+
34
+ private
35
+
36
+ # @return [Array<Karafka::Connection::Listener>] listeners that will consume messages
37
+ def listeners
38
+ @listeners ||= App.consumer_groups.active.map do |consumer_group|
39
+ Karafka::Connection::Listener.new(consumer_group)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end