karafka 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/.coditsu.yml +3 -0
  3. data/.console_irbrc +13 -0
  4. data/.gitignore +68 -0
  5. data/.rspec +1 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/.travis.yml +49 -0
  9. data/CHANGELOG.md +458 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +41 -0
  12. data/Gemfile +15 -0
  13. data/Gemfile.lock +126 -0
  14. data/MIT-LICENCE +18 -0
  15. data/README.md +102 -0
  16. data/bin/karafka +19 -0
  17. data/config/errors.yml +6 -0
  18. data/karafka.gemspec +42 -0
  19. data/lib/karafka.rb +79 -0
  20. data/lib/karafka/app.rb +45 -0
  21. data/lib/karafka/attributes_map.rb +69 -0
  22. data/lib/karafka/backends/inline.rb +16 -0
  23. data/lib/karafka/base_consumer.rb +68 -0
  24. data/lib/karafka/base_responder.rb +208 -0
  25. data/lib/karafka/callbacks.rb +30 -0
  26. data/lib/karafka/callbacks/config.rb +22 -0
  27. data/lib/karafka/callbacks/dsl.rb +16 -0
  28. data/lib/karafka/cli.rb +54 -0
  29. data/lib/karafka/cli/base.rb +78 -0
  30. data/lib/karafka/cli/console.rb +29 -0
  31. data/lib/karafka/cli/flow.rb +46 -0
  32. data/lib/karafka/cli/info.rb +29 -0
  33. data/lib/karafka/cli/install.rb +42 -0
  34. data/lib/karafka/cli/server.rb +66 -0
  35. data/lib/karafka/connection/api_adapter.rb +148 -0
  36. data/lib/karafka/connection/builder.rb +16 -0
  37. data/lib/karafka/connection/client.rb +107 -0
  38. data/lib/karafka/connection/delegator.rb +46 -0
  39. data/lib/karafka/connection/listener.rb +60 -0
  40. data/lib/karafka/consumers/callbacks.rb +54 -0
  41. data/lib/karafka/consumers/includer.rb +51 -0
  42. data/lib/karafka/consumers/responders.rb +24 -0
  43. data/lib/karafka/consumers/single_params.rb +15 -0
  44. data/lib/karafka/errors.rb +50 -0
  45. data/lib/karafka/fetcher.rb +44 -0
  46. data/lib/karafka/helpers/class_matcher.rb +78 -0
  47. data/lib/karafka/helpers/config_retriever.rb +46 -0
  48. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  49. data/lib/karafka/instrumentation/listener.rb +112 -0
  50. data/lib/karafka/instrumentation/logger.rb +55 -0
  51. data/lib/karafka/instrumentation/monitor.rb +64 -0
  52. data/lib/karafka/loader.rb +28 -0
  53. data/lib/karafka/params/dsl.rb +158 -0
  54. data/lib/karafka/params/params_batch.rb +46 -0
  55. data/lib/karafka/parsers/json.rb +38 -0
  56. data/lib/karafka/patches/dry_configurable.rb +33 -0
  57. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  58. data/lib/karafka/persistence/client.rb +25 -0
  59. data/lib/karafka/persistence/consumer.rb +38 -0
  60. data/lib/karafka/persistence/topic.rb +29 -0
  61. data/lib/karafka/process.rb +62 -0
  62. data/lib/karafka/responders/builder.rb +36 -0
  63. data/lib/karafka/responders/topic.rb +57 -0
  64. data/lib/karafka/routing/builder.rb +61 -0
  65. data/lib/karafka/routing/consumer_group.rb +61 -0
  66. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  67. data/lib/karafka/routing/proxy.rb +37 -0
  68. data/lib/karafka/routing/router.rb +29 -0
  69. data/lib/karafka/routing/topic.rb +60 -0
  70. data/lib/karafka/routing/topic_mapper.rb +55 -0
  71. data/lib/karafka/schemas/config.rb +24 -0
  72. data/lib/karafka/schemas/consumer_group.rb +78 -0
  73. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  74. data/lib/karafka/schemas/responder_usage.rb +39 -0
  75. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  76. data/lib/karafka/server.rb +85 -0
  77. data/lib/karafka/setup/config.rb +193 -0
  78. data/lib/karafka/setup/configurators/base.rb +29 -0
  79. data/lib/karafka/setup/configurators/params.rb +25 -0
  80. data/lib/karafka/setup/configurators/water_drop.rb +32 -0
  81. data/lib/karafka/setup/dsl.rb +22 -0
  82. data/lib/karafka/status.rb +25 -0
  83. data/lib/karafka/templates/application_consumer.rb.example +6 -0
  84. data/lib/karafka/templates/application_responder.rb.example +11 -0
  85. data/lib/karafka/templates/karafka.rb.example +54 -0
  86. data/lib/karafka/version.rb +7 -0
  87. data/log/.gitkeep +0 -0
  88. metadata +303 -0
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # App class
5
+ class App
6
+ extend Setup::Dsl
7
+ extend Callbacks::Dsl
8
+
9
+ class << self
10
+ # Sets up all the internal components and bootstrap whole app
11
+ # We need to know details about consumers in order to setup components,
12
+ # that's why we don't setup them after std setup is done
13
+ # @raise [Karafka::Errors::InvalidConfiguration] raised when configuration
14
+ # doesn't match with ConfigurationSchema
15
+ def boot!
16
+ Setup::Config.validate!
17
+ Setup::Config.setup_components
18
+ Callbacks.after_init(Karafka::App.config)
19
+ end
20
+
21
+ # @return [Karafka::Routing::Builder] consumers builder instance
22
+ def consumer_groups
23
+ Routing::Builder.instance
24
+ end
25
+
26
+ Status.instance_methods(false).each do |delegated|
27
+ define_method(delegated) do
28
+ Status.instance.send(delegated)
29
+ end
30
+ end
31
+
32
+ # Methods that should be delegated to Karafka module
33
+ %i[
34
+ root
35
+ env
36
+ logger
37
+ monitor
38
+ ].each do |delegated|
39
+ define_method(delegated) do
40
+ Karafka.send(delegated)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Both Karafka and Ruby-Kafka contain a lot of settings that can be applied on multiple
5
+ # levels. In Karafka that is on consumer group and on the topic level. In Ruby-Kafka it
6
+ # is on consumer, subscription and consumption levels. In order to maintain an order
7
+ # in managing those settings, this module was created. It contains details on what setting
8
+ # where should go and which layer (both on Karafka and Ruby-Kafka) is responsible for
9
+ # setting it and sending it forward
10
+ # @note Settings presented here cover all the settings that are being used across Karafka
11
+ module AttributesMap
12
+ class << self
13
+ # What settings should go where in ruby-kafka
14
+ # @note All other settings will be passed to Kafka.new method invocation.
15
+ # All elements in this hash are just edge cases
16
+ # @return [Hash] hash with proper sections on what to proxy where in Ruby-Kafka
17
+ def api_adapter
18
+ {
19
+ consumer: %i[
20
+ session_timeout offset_commit_interval offset_commit_threshold
21
+ offset_retention_time heartbeat_interval fetcher_max_queue_size
22
+ ],
23
+ subscribe: %i[start_from_beginning max_bytes_per_partition],
24
+ consumption: %i[min_bytes max_bytes max_wait_time],
25
+ pause: %i[pause_timeout],
26
+ # All the options that are under kafka config namespace, but are not used
27
+ # directly with kafka api, but from the Karafka user perspective, they are
28
+ # still related to kafka. They should not be proxied anywhere
29
+ ignored: %i[reconnect_timeout automatically_mark_as_consumed]
30
+ }
31
+ end
32
+
33
+ # @return [Array<Symbol>] properties that can be set on a per topic level
34
+ def topic
35
+ (api_adapter[:subscribe] + %i[
36
+ backend
37
+ name
38
+ parser
39
+ responder
40
+ batch_consuming
41
+ persistent
42
+ ]).uniq
43
+ end
44
+
45
+ # @return [Array<Symbol>] properties that can be set on a per consumer group level
46
+ # @note Note that there are settings directly extracted from the config kafka namespace
47
+ # I did this that way, so I won't have to repeat same setting keys over and over again
48
+ # Thanks to this solution, if any new setting is available for ruby-kafka, we just need
49
+ # to add it to our configuration class and it will be handled automatically.
50
+ def consumer_group
51
+ # @note We don't ignore the api_adapter[:ignored] values as they should be ignored
52
+ # only when proxying details go ruby-kafka. We use ignored fields internally in karafka
53
+ ignored_settings = api_adapter[:subscribe]
54
+ defined_settings = api_adapter.values.flatten
55
+ karafka_settings = %i[batch_fetching]
56
+ # This is a drity and bad hack of dry-configurable to get keys before setting values
57
+ dynamically_proxied = Karafka::Setup::Config
58
+ ._settings
59
+ .settings
60
+ .find { |s| s.name == :kafka }
61
+ .value
62
+ .names
63
+ .to_a
64
+
65
+ (defined_settings + dynamically_proxied).uniq + karafka_settings - ignored_settings
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all different backends Karafka supports
5
+ module Backends
6
+ # Backend that just runs stuff asap without any scheduling
7
+ module Inline
8
+ private
9
+
10
+ # Executes consume code immediately (without enqueuing)
11
+ def process
12
+ Karafka.monitor.instrument('backends.inline.process', caller: self) { consume }
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Karafka module namespace
4
+ module Karafka
5
+ # Base consumer from which all Karafka consumers should inherit
6
+ class BaseConsumer
7
+ extend ActiveSupport::DescendantsTracker
8
+ extend Forwardable
9
+
10
+ # Allows us to mark messages as consumed for non-automatic mode without having
11
+ # to use consumer client directly. We do this that way, because most of the people should not
12
+ # mess with the client instance directly (just in case)
13
+ def_delegator :client, :mark_as_consumed
14
+
15
+ private :mark_as_consumed
16
+
17
+ class << self
18
+ attr_reader :topic
19
+
20
+ # Assigns a topic to a consumer and builds up proper consumer functionalities
21
+ # so that it can cooperate with the topic settings
22
+ # @param topic [Karafka::Routing::Topic]
23
+ # @return [Karafka::Routing::Topic] assigned topic
24
+ def topic=(topic)
25
+ @topic = topic
26
+ Consumers::Includer.call(self)
27
+ end
28
+ end
29
+
30
+ # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
31
+ def topic
32
+ self.class.topic
33
+ end
34
+
35
+ # Creates lazy loaded params batch object
36
+ # @note Until first params usage, it won't parse data at all
37
+ # @param messages [Array<Kafka::FetchedMessage>, Array<Hash>] messages with raw
38
+ # content (from Kafka) or messages inside a hash (from backend, etc)
39
+ # @return [Karafka::Params::ParamsBatch] lazy loaded params batch
40
+ def params_batch=(messages)
41
+ @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
42
+ end
43
+
44
+ # Executes the default consumer flow.
45
+ def call
46
+ process
47
+ end
48
+
49
+ private
50
+
51
+ # We make it private as it should be accessible only from the inside of a consumer
52
+ attr_reader :params_batch
53
+
54
+ # @return [Karafka::Connection::Client] messages consuming client that can be used to
55
+ # commit manually offset or pause / stop consumer based on the business logic
56
+ def client
57
+ Persistence::Client.read
58
+ end
59
+
60
+ # Method that will perform business logic and on data received from Kafka (it will consume
61
+ # the data)
62
+ # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
63
+ # someone forgets about it or makes on with typo
64
+ def consume
65
+ raise NotImplementedError, 'Implement this in a subclass'
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Base responder from which all Karafka responders should inherit
5
+ # Similar to Rails responders concept. It allows us to design flow from one app to another
6
+ # by isolating what responses should be sent (and where) based on a given action
7
+ # It differs from Rails responders in the way it works: in std http request we can have one
8
+ # response, here we can have unlimited number of them
9
+ #
10
+ # It has a simple API for defining where should we respond (and if it is required)
11
+ #
12
+ # @example Basic usage (each registered topic is required to be used by default)
13
+ # class Responder < BaseResponder
14
+ # topic :new_action
15
+ #
16
+ # def respond(data)
17
+ # respond_to :new_action, data
18
+ # end
19
+ # end
20
+ #
21
+ # @example Responding to a topic with extra options
22
+ # class Responder < BaseResponder
23
+ # topic :new_action
24
+ #
25
+ # def respond(data)
26
+ # respond_to :new_action, data, partition_key: 'thing'
27
+ # end
28
+ # end
29
+ #
30
+ # @example Marking topic as not required (we won't have to use it)
31
+ # class Responder < BaseResponder
32
+ # topic :required_topic
33
+ # topic :new_action, required: false
34
+ #
35
+ # def respond(data)
36
+ # respond_to :required_topic, data
37
+ # end
38
+ # end
39
+ #
40
+ # @example Multiple times used topic
41
+ # class Responder < BaseResponder
42
+ # topic :required_topic, multiple_usage: true
43
+ #
44
+ # def respond(data)
45
+ # data.each do |subset|
46
+ # respond_to :required_topic, subset
47
+ # end
48
+ # end
49
+ # end
50
+ #
51
+ # @example Accept multiple arguments to a respond method
52
+ # class Responder < BaseResponder
53
+ # topic :users_actions
54
+ # topic :articles_viewed
55
+ #
56
+ # def respond(user, article)
57
+ # respond_to :users_actions, user
58
+ # respond_to :articles_viewed, article
59
+ # end
60
+ # end
61
+ class BaseResponder
62
+ # Definitions of all topics that we want to be able to use in this responder should go here
63
+ class_attribute :topics
64
+
65
+ # Schema that we can use to control and/or require some additional details upon options
66
+ # that are being passed to the producer. This can be in particular useful if we want to make
67
+ # sure that for example partition_key is always present.
68
+ class_attribute :options_schema
69
+
70
+ attr_reader :messages_buffer
71
+
72
+ class << self
73
+ # Registers a topic as on to which we will be able to respond
74
+ # @param topic_name [Symbol, String] name of topic to which we want to respond
75
+ # @param options [Hash] hash with optional configuration details
76
+ def topic(topic_name, options = {})
77
+ self.topics ||= {}
78
+ topic_obj = Responders::Topic.new(topic_name, options.merge(registered: true))
79
+ self.topics[topic_obj.name] = topic_obj
80
+ end
81
+
82
+ # A simple alias for easier standalone responder usage.
83
+ # Instead of building it with new.call it allows (in case of usin JSON parser)
84
+ # to just run it directly from the class level
85
+ # @param data Anything that we want to respond with
86
+ # @example Send user data with a responder (uses default Karafka::Parsers::Json parser)
87
+ # UsersCreatedResponder.call(@created_user)
88
+ def call(*data)
89
+ # Just in case there were no topics defined for a responder, we initialize with
90
+ # empty hash not to handle a nil case
91
+ self.topics ||= {}
92
+ new.call(*data)
93
+ end
94
+ end
95
+
96
+ # Creates a responder object
97
+ # @param parser_class [Class] parser class that we can use to generate appropriate string
98
+ # or nothing if we want to default to Karafka::Parsers::Json
99
+ # @return [Karafka::BaseResponder] base responder descendant responder
100
+ def initialize(parser_class = Karafka::App.config.parser)
101
+ @parser_class = parser_class
102
+ @messages_buffer = {}
103
+ end
104
+
105
+ # Performs respond and validates that all the response requirement were met
106
+ # @param data Anything that we want to respond with
107
+ # @note We know that validators should be executed also before sending data to topics, however
108
+ # the implementation gets way more complicated then, that's why we check after everything
109
+ # was sent using responder
110
+ # @example Send user data with a responder (uses default Karafka::Parsers::Json parser)
111
+ # UsersCreatedResponder.new.call(@created_user)
112
+ # @example Send user data with a responder using non default Parser
113
+ # UsersCreatedResponder.new(MyParser).call(@created_user)
114
+ def call(*data)
115
+ respond(*data)
116
+ validate_usage!
117
+ validate_options!
118
+ deliver!
119
+ end
120
+
121
+ private
122
+
123
+ # Checks if we met all the topics requirements. It will fail if we didn't send a message to
124
+ # a registered required topic, etc.
125
+ def validate_usage!
126
+ registered_topics = self.class.topics.map do |name, topic|
127
+ topic.to_h.merge!(
128
+ usage_count: messages_buffer[name]&.count || 0
129
+ )
130
+ end
131
+
132
+ used_topics = messages_buffer.map do |name, usage|
133
+ topic = self.class.topics[name] || Responders::Topic.new(name, registered: false)
134
+ topic.to_h.merge!(usage_count: usage.count)
135
+ end
136
+
137
+ result = Karafka::Schemas::ResponderUsage.call(
138
+ registered_topics: registered_topics,
139
+ used_topics: used_topics
140
+ )
141
+
142
+ return if result.success?
143
+
144
+ raise Karafka::Errors::InvalidResponderUsage, result.errors
145
+ end
146
+
147
+ # Checks if we met all the options requirements before sending them to the producer.
148
+ def validate_options!
149
+ return true unless self.class.options_schema
150
+
151
+ messages_buffer.each_value do |messages_set|
152
+ messages_set.each do |message_data|
153
+ result = self.class.options_schema.call(message_data.last)
154
+ next if result.success?
155
+ raise Karafka::Errors::InvalidResponderMessageOptions, result.errors
156
+ end
157
+ end
158
+ end
159
+
160
+ # Takes all the messages from the buffer and delivers them one by one
161
+ # @note This method is executed after the validation, so we're sure that
162
+ # what we send is legit and it will go to a proper topics
163
+ def deliver!
164
+ messages_buffer.each_value do |data_elements|
165
+ data_elements.each do |data, options|
166
+ # We map this topic name, so it will match namespaced/etc topic in Kafka
167
+ # @note By default will not change topic (if default mapper used)
168
+ mapped_topic = Karafka::App.config.topic_mapper.outgoing(options[:topic])
169
+ external_options = options.merge(topic: mapped_topic)
170
+ producer(options).call(data, external_options)
171
+ end
172
+ end
173
+ end
174
+
175
+ # Method that needs to be implemented in a subclass. It should handle responding
176
+ # on registered topics
177
+ # @raise [NotImplementedError] This method needs to be implemented in a subclass
178
+ def respond(*_data)
179
+ raise NotImplementedError, 'Implement this in a subclass'
180
+ end
181
+
182
+ # This method allow us to respond to a single topic with a given data. It can be used
183
+ # as many times as we need. Especially when we have 1:n flow
184
+ # @param topic [Symbol, String] topic to which we want to respond
185
+ # @param data [String, Object] string or object that we want to send
186
+ # @param options [Hash] options for waterdrop (e.g. partition_key)
187
+ # @note Respond to does not accept multiple data arguments.
188
+ def respond_to(topic, data, options = {})
189
+ # We normalize the format to string, as WaterDrop and Ruby-Kafka support only
190
+ # string topics
191
+ topic = topic.to_s
192
+
193
+ messages_buffer[topic] ||= []
194
+ messages_buffer[topic] << [
195
+ @parser_class.generate(data),
196
+ options.merge(topic: topic)
197
+ ]
198
+ end
199
+
200
+ # @param options [Hash] options for waterdrop
201
+ # @return [Class] WaterDrop producer (sync or async based on the settings)
202
+ def producer(options)
203
+ self.class.topics[
204
+ options[:topic]
205
+ ].async? ? WaterDrop::AsyncProducer : WaterDrop::SyncProducer
206
+ end
207
+ end
208
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional callbacks that are used to trigger some things in given places during the
5
+ # system lifecycle
6
+ # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
7
+ # lifecycle of particular messages fetches but rather to the internal flow process.
8
+ # They cannot be defined on a consumer callback level because for some of those,
9
+ # there aren't consumers in the memory yet and/or they aren't per consumer thread
10
+ module Callbacks
11
+ # Types of system callbacks that we have that are not related to consumers
12
+ TYPES = %i[
13
+ after_init
14
+ before_fetch_loop
15
+ ].freeze
16
+
17
+ class << self
18
+ TYPES.each do |callback_type|
19
+ # Executes given callbacks set at a given moment with provided arguments
20
+ define_method callback_type do |*args|
21
+ Karafka::App
22
+ .config
23
+ .callbacks
24
+ .send(callback_type)
25
+ .each { |callback| callback.call(*args) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # Additional configuration required to store procs that we will execute upon callback trigger
6
+ module Config
7
+ # Builds up internal callback accumulators
8
+ # @param klass [Class] Class that we extend with callback config
9
+ def self.extended(klass)
10
+ # option internal [Hash] - optional - internal karafka configuration settings that should
11
+ # never be changed by users directly
12
+ klass.setting :callbacks do
13
+ Callbacks::TYPES.each do |callback_type|
14
+ # option [Array<Proc>] an array of blocks that will be executed at a given moment
15
+ # depending on the callback type
16
+ setting callback_type, []
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end