karafka 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.console_irbrc +13 -0
  3. data/.gitignore +68 -0
  4. data/.rspec +1 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +18 -0
  8. data/CHANGELOG.md +415 -0
  9. data/CODE_OF_CONDUCT.md +46 -0
  10. data/CONTRIBUTING.md +41 -0
  11. data/Gemfile +11 -0
  12. data/Gemfile.lock +123 -0
  13. data/MIT-LICENCE +18 -0
  14. data/README.md +89 -0
  15. data/bin/karafka +19 -0
  16. data/config/errors.yml +6 -0
  17. data/karafka.gemspec +37 -0
  18. data/lib/karafka.rb +78 -0
  19. data/lib/karafka/app.rb +45 -0
  20. data/lib/karafka/attributes_map.rb +67 -0
  21. data/lib/karafka/backends/inline.rb +16 -0
  22. data/lib/karafka/base_consumer.rb +68 -0
  23. data/lib/karafka/base_responder.rb +204 -0
  24. data/lib/karafka/callbacks.rb +30 -0
  25. data/lib/karafka/callbacks/config.rb +22 -0
  26. data/lib/karafka/callbacks/dsl.rb +16 -0
  27. data/lib/karafka/cli.rb +54 -0
  28. data/lib/karafka/cli/base.rb +78 -0
  29. data/lib/karafka/cli/console.rb +29 -0
  30. data/lib/karafka/cli/flow.rb +46 -0
  31. data/lib/karafka/cli/info.rb +29 -0
  32. data/lib/karafka/cli/install.rb +42 -0
  33. data/lib/karafka/cli/server.rb +66 -0
  34. data/lib/karafka/connection/client.rb +117 -0
  35. data/lib/karafka/connection/config_adapter.rb +120 -0
  36. data/lib/karafka/connection/delegator.rb +46 -0
  37. data/lib/karafka/connection/listener.rb +60 -0
  38. data/lib/karafka/consumers/callbacks.rb +54 -0
  39. data/lib/karafka/consumers/includer.rb +51 -0
  40. data/lib/karafka/consumers/responders.rb +24 -0
  41. data/lib/karafka/consumers/single_params.rb +15 -0
  42. data/lib/karafka/errors.rb +50 -0
  43. data/lib/karafka/fetcher.rb +44 -0
  44. data/lib/karafka/helpers/class_matcher.rb +78 -0
  45. data/lib/karafka/helpers/config_retriever.rb +46 -0
  46. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  47. data/lib/karafka/instrumentation/listener.rb +112 -0
  48. data/lib/karafka/instrumentation/logger.rb +55 -0
  49. data/lib/karafka/instrumentation/monitor.rb +64 -0
  50. data/lib/karafka/loader.rb +28 -0
  51. data/lib/karafka/params/dsl.rb +156 -0
  52. data/lib/karafka/params/params_batch.rb +46 -0
  53. data/lib/karafka/parsers/json.rb +38 -0
  54. data/lib/karafka/patches/dry_configurable.rb +35 -0
  55. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  56. data/lib/karafka/persistence/client.rb +25 -0
  57. data/lib/karafka/persistence/consumer.rb +38 -0
  58. data/lib/karafka/persistence/topic.rb +29 -0
  59. data/lib/karafka/process.rb +64 -0
  60. data/lib/karafka/responders/builder.rb +36 -0
  61. data/lib/karafka/responders/topic.rb +57 -0
  62. data/lib/karafka/routing/builder.rb +61 -0
  63. data/lib/karafka/routing/consumer_group.rb +61 -0
  64. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  65. data/lib/karafka/routing/proxy.rb +37 -0
  66. data/lib/karafka/routing/router.rb +29 -0
  67. data/lib/karafka/routing/topic.rb +60 -0
  68. data/lib/karafka/routing/topic_mapper.rb +55 -0
  69. data/lib/karafka/schemas/config.rb +24 -0
  70. data/lib/karafka/schemas/consumer_group.rb +77 -0
  71. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  72. data/lib/karafka/schemas/responder_usage.rb +39 -0
  73. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  74. data/lib/karafka/server.rb +94 -0
  75. data/lib/karafka/setup/config.rb +189 -0
  76. data/lib/karafka/setup/configurators/base.rb +29 -0
  77. data/lib/karafka/setup/configurators/params.rb +25 -0
  78. data/lib/karafka/setup/configurators/water_drop.rb +32 -0
  79. data/lib/karafka/setup/dsl.rb +22 -0
  80. data/lib/karafka/status.rb +25 -0
  81. data/lib/karafka/templates/application_consumer.rb.example +6 -0
  82. data/lib/karafka/templates/application_responder.rb.example +11 -0
  83. data/lib/karafka/templates/karafka.rb.example +54 -0
  84. data/lib/karafka/version.rb +7 -0
  85. data/log/.gitkeep +0 -0
  86. metadata +301 -0
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # App class
5
+ class App
6
+ extend Setup::Dsl
7
+ extend Callbacks::Dsl
8
+
9
+ class << self
10
+ # Sets up all the internal components and bootstrap whole app
11
+ # We need to know details about consumers in order to setup components,
12
+ # that's why we don't setup them after std setup is done
13
+ # @raise [Karafka::Errors::InvalidConfiguration] raised when configuration
14
+ # doesn't match with ConfigurationSchema
15
+ def boot!
16
+ Setup::Config.validate!
17
+ Setup::Config.setup_components
18
+ Callbacks.after_init(Karafka::App.config)
19
+ end
20
+
21
+ # @return [Karafka::Routing::Builder] consumers builder instance
22
+ def consumer_groups
23
+ Routing::Builder.instance
24
+ end
25
+
26
+ Status.instance_methods(false).each do |delegated|
27
+ define_method(delegated) do
28
+ Status.instance.send(delegated)
29
+ end
30
+ end
31
+
32
+ # Methods that should be delegated to Karafka module
33
+ %i[
34
+ root
35
+ env
36
+ logger
37
+ monitor
38
+ ].each do |delegated|
39
+ define_method(delegated) do
40
+ Karafka.send(delegated)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Both Karafka and Ruby-Kafka contain a lot of settings that can be applied on multiple
5
+ # levels. In Karafka that is on consumer group and on the topic level. In Ruby-Kafka it
6
+ # is on consumer, subscription and consumption levels. In order to maintain an order
7
+ # in managing those settings, this module was created. It contains details on what setting
8
+ # where should go and which layer (both on Karafka and Ruby-Kafka) is responsible for
9
+ # setting it and sending it forward
10
+ # @note Settings presented here cover all the settings that are being used across Karafka
11
+ module AttributesMap
12
+ class << self
13
+ # What settings should go where in ruby-kafka
14
+ # @note All other settings will be passed to Kafka.new method invocation.
15
+ # All elements in this hash are just edge cases
16
+ # @return [Hash] hash with proper sections on what to proxy where in Ruby-Kafka
17
+ def config_adapter
18
+ {
19
+ consumer: %i[
20
+ session_timeout offset_commit_interval offset_commit_threshold
21
+ offset_retention_time heartbeat_interval
22
+ ],
23
+ subscription: %i[start_from_beginning max_bytes_per_partition],
24
+ consuming: %i[min_bytes max_bytes max_wait_time],
25
+ pausing: %i[pause_timeout],
26
+ # All the options that are under kafka config namespace, but are not used
27
+ # directly with kafka api, but from the Karafka user perspective, they are
28
+ # still related to kafka. They should not be proxied anywhere
29
+ ignored: %i[reconnect_timeout automatically_mark_as_consumed]
30
+ }
31
+ end
32
+
33
+ # @return [Array<Symbol>] properties that can be set on a per topic level
34
+ def topic
35
+ (config_adapter[:subscription] + %i[
36
+ backend
37
+ name
38
+ parser
39
+ responder
40
+ batch_consuming
41
+ persistent
42
+ ]).uniq
43
+ end
44
+
45
+ # @return [Array<Symbol>] properties that can be set on a per consumer group level
46
+ # @note Note that there are settings directly extracted from the config kafka namespace
47
+ # I did this that way, so I won't have to repeat same setting keys over and over again
48
+ # Thanks to this solution, if any new setting is available for ruby-kafka, we just need
49
+ # to add it to our configuration class and it will be handled automatically.
50
+ def consumer_group
51
+ # @note We don't ignore the config_adapter[:ignored] values as they should be ignored
52
+ # only when proxying details go ruby-kafka. We use ignored fields internally in karafka
53
+ ignored_settings = config_adapter[:subscription]
54
+ defined_settings = config_adapter.values.flatten
55
+ karafka_settings = %i[batch_fetching]
56
+ # This is a drity and bad hack of dry-configurable to get keys before setting values
57
+ dynamically_proxied = Karafka::Setup::Config
58
+ ._settings
59
+ .find { |s| s.name == :kafka }
60
+ .value
61
+ .instance_variable_get('@klass').settings
62
+
63
+ (defined_settings + dynamically_proxied).uniq + karafka_settings - ignored_settings
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all different backends Karafka supports
5
+ module Backends
6
+ # Backend that just runs stuff asap without any scheduling
7
+ module Inline
8
+ private
9
+
10
+ # Executes consume code immediately (without enqueuing)
11
+ def process
12
+ Karafka.monitor.instrument('backends.inline.process', caller: self) { consume }
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Karafka module namespace
4
+ module Karafka
5
+ # Base consumer from which all Karafka consumers should inherit
6
+ class BaseConsumer
7
+ extend ActiveSupport::DescendantsTracker
8
+ extend Forwardable
9
+
10
+ # Allows us to mark messages as consumed for non-automatic mode without having
11
+ # to use consumer client directly. We do this that way, because most of the people should not
12
+ # mess with the client instance directly (just in case)
13
+ def_delegator :client, :mark_as_consumed
14
+
15
+ private :mark_as_consumed
16
+
17
+ class << self
18
+ attr_reader :topic
19
+
20
+ # Assigns a topic to a consumer and builds up proper consumer functionalities
21
+ # so that it can cooperate with the topic settings
22
+ # @param topic [Karafka::Routing::Topic]
23
+ # @return [Karafka::Routing::Topic] assigned topic
24
+ def topic=(topic)
25
+ @topic = topic
26
+ Consumers::Includer.call(self)
27
+ end
28
+ end
29
+
30
+ # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
31
+ def topic
32
+ self.class.topic
33
+ end
34
+
35
+ # Creates lazy loaded params batch object
36
+ # @note Until first params usage, it won't parse data at all
37
+ # @param messages [Array<Kafka::FetchedMessage>, Array<Hash>] messages with raw
38
+ # content (from Kafka) or messages inside a hash (from backend, etc)
39
+ # @return [Karafka::Params::ParamsBatch] lazy loaded params batch
40
+ def params_batch=(messages)
41
+ @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
42
+ end
43
+
44
+ # Executes the default consumer flow.
45
+ def call
46
+ process
47
+ end
48
+
49
+ private
50
+
51
+ # We make it private as it should be accessible only from the inside of a consumer
52
+ attr_reader :params_batch
53
+
54
+ # @return [Karafka::Connection::Client] messages consuming client that can be used to
55
+ # commit manually offset or pause / stop consumer based on the business logic
56
+ def client
57
+ Persistence::Client.read
58
+ end
59
+
60
+ # Method that will perform business logic and on data received from Kafka (it will consume
61
+ # the data)
62
+ # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
63
+ # someone forgets about it or makes on with typo
64
+ def consume
65
+ raise NotImplementedError, 'Implement this in a subclass'
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Base responder from which all Karafka responders should inherit
5
+ # Similar to Rails responders concept. It allows us to design flow from one app to another
6
+ # by isolating what responses should be sent (and where) based on a given action
7
+ # It differs from Rails responders in the way it works: in std http request we can have one
8
+ # response, here we can have unlimited number of them
9
+ #
10
+ # It has a simple API for defining where should we respond (and if it is required)
11
+ #
12
+ # @example Basic usage (each registered topic is required to be used by default)
13
+ # class Responder < BaseResponder
14
+ # topic :new_action
15
+ #
16
+ # def respond(data)
17
+ # respond_to :new_action, data
18
+ # end
19
+ # end
20
+ #
21
+ # @example Responding to a topic with extra options
22
+ # class Responder < BaseResponder
23
+ # topic :new_action
24
+ #
25
+ # def respond(data)
26
+ # respond_to :new_action, data, partition_key: 'thing'
27
+ # end
28
+ # end
29
+ #
30
+ # @example Marking topic as not required (we won't have to use it)
31
+ # class Responder < BaseResponder
32
+ # topic :required_topic
33
+ # topic :new_action, required: false
34
+ #
35
+ # def respond(data)
36
+ # respond_to :required_topic, data
37
+ # end
38
+ # end
39
+ #
40
+ # @example Multiple times used topic
41
+ # class Responder < BaseResponder
42
+ # topic :required_topic, multiple_usage: true
43
+ #
44
+ # def respond(data)
45
+ # data.each do |subset|
46
+ # respond_to :required_topic, subset
47
+ # end
48
+ # end
49
+ # end
50
+ #
51
+ # @example Accept multiple arguments to a respond method
52
+ # class Responder < BaseResponder
53
+ # topic :users_actions
54
+ # topic :articles_viewed
55
+ #
56
+ # def respond(user, article)
57
+ # respond_to :users_actions, user
58
+ # respond_to :articles_viewed, article
59
+ # end
60
+ # end
61
+ class BaseResponder
62
+ # Definitions of all topics that we want to be able to use in this responder should go here
63
+ class_attribute :topics
64
+
65
+ # Schema that we can use to control and/or require some additional details upon options
66
+ # that are being passed to the producer. This can be in particular useful if we want to make
67
+ # sure that for example partition_key is always present.
68
+ class_attribute :options_schema
69
+
70
+ attr_reader :messages_buffer
71
+
72
+ class << self
73
+ # Registers a topic as on to which we will be able to respond
74
+ # @param topic_name [Symbol, String] name of topic to which we want to respond
75
+ # @param options [Hash] hash with optional configuration details
76
+ def topic(topic_name, options = {})
77
+ self.topics ||= {}
78
+ topic_obj = Responders::Topic.new(topic_name, options.merge(registered: true))
79
+ self.topics[topic_obj.name] = topic_obj
80
+ end
81
+
82
+ # A simple alias for easier standalone responder usage.
83
+ # Instead of building it with new.call it allows (in case of usin JSON parser)
84
+ # to just run it directly from the class level
85
+ # @param data Anything that we want to respond with
86
+ # @example Send user data with a responder (uses default Karafka::Parsers::Json parser)
87
+ # UsersCreatedResponder.call(@created_user)
88
+ def call(*data)
89
+ # Just in case there were no topics defined for a responder, we initialize with
90
+ # empty hash not to handle a nil case
91
+ self.topics ||= {}
92
+ new.call(*data)
93
+ end
94
+ end
95
+
96
+ # Creates a responder object
97
+ # @param parser_class [Class] parser class that we can use to generate appropriate string
98
+ # or nothing if we want to default to Karafka::Parsers::Json
99
+ # @return [Karafka::BaseResponder] base responder descendant responder
100
+ def initialize(parser_class = Karafka::App.config.parser)
101
+ @parser_class = parser_class
102
+ @messages_buffer = {}
103
+ end
104
+
105
+ # Performs respond and validates that all the response requirement were met
106
+ # @param data Anything that we want to respond with
107
+ # @note We know that validators should be executed also before sending data to topics, however
108
+ # the implementation gets way more complicated then, that's why we check after everything
109
+ # was sent using responder
110
+ # @example Send user data with a responder (uses default Karafka::Parsers::Json parser)
111
+ # UsersCreatedResponder.new.call(@created_user)
112
+ # @example Send user data with a responder using non default Parser
113
+ # UsersCreatedResponder.new(MyParser).call(@created_user)
114
+ def call(*data)
115
+ respond(*data)
116
+ validate_usage!
117
+ validate_options!
118
+ deliver!
119
+ end
120
+
121
+ private
122
+
123
+ # Checks if we met all the topics requirements. It will fail if we didn't send a message to
124
+ # a registered required topic, etc.
125
+ def validate_usage!
126
+ registered_topics = self.class.topics.map do |name, topic|
127
+ topic.to_h.merge!(
128
+ usage_count: messages_buffer[name]&.count || 0
129
+ )
130
+ end
131
+
132
+ used_topics = messages_buffer.map do |name, usage|
133
+ topic = self.class.topics[name] || Responders::Topic.new(name, registered: false)
134
+ topic.to_h.merge!(usage_count: usage.count)
135
+ end
136
+
137
+ result = Karafka::Schemas::ResponderUsage.call(
138
+ registered_topics: registered_topics,
139
+ used_topics: used_topics
140
+ )
141
+
142
+ return if result.success?
143
+
144
+ raise Karafka::Errors::InvalidResponderUsage, result.errors
145
+ end
146
+
147
+ # Checks if we met all the options requirements before sending them to the producer.
148
+ def validate_options!
149
+ return true unless self.class.options_schema
150
+
151
+ messages_buffer.each_value do |messages_set|
152
+ messages_set.each do |message_data|
153
+ result = self.class.options_schema.call(message_data.last)
154
+ next if result.success?
155
+ raise Karafka::Errors::InvalidResponderMessageOptions, result.errors
156
+ end
157
+ end
158
+ end
159
+
160
+ # Takes all the messages from the buffer and delivers them one by one
161
+ # @note This method is executed after the validation, so we're sure that
162
+ # what we send is legit and it will go to a proper topics
163
+ def deliver!
164
+ messages_buffer.each_value do |data_elements|
165
+ data_elements.each do |data, options|
166
+ producer(options).call(data, options)
167
+ end
168
+ end
169
+ end
170
+
171
+ # Method that needs to be implemented in a subclass. It should handle responding
172
+ # on registered topics
173
+ # @raise [NotImplementedError] This method needs to be implemented in a subclass
174
+ def respond(*_data)
175
+ raise NotImplementedError, 'Implement this in a subclass'
176
+ end
177
+
178
+ # This method allow us to respond to a single topic with a given data. It can be used
179
+ # as many times as we need. Especially when we have 1:n flow
180
+ # @param topic [Symbol, String] topic to which we want to respond
181
+ # @param data [String, Object] string or object that we want to send
182
+ # @param options [Hash] options for waterdrop (e.g. partition_key)
183
+ # @note Respond to does not accept multiple data arguments.
184
+ def respond_to(topic, data, options = {})
185
+ # We normalize the format to string, as WaterDrop and Ruby-Kafka support only
186
+ # string topics
187
+ topic = topic.to_s
188
+
189
+ messages_buffer[topic] ||= []
190
+ messages_buffer[topic] << [
191
+ @parser_class.generate(data),
192
+ # We map this topic name, so it will match namespaced/etc topic in Kafka
193
+ # @note By default will not change topic (if default mapper used)
194
+ options.merge(topic: Karafka::App.config.topic_mapper.outgoing(topic))
195
+ ]
196
+ end
197
+
198
+ # @param options [Hash] options for waterdrop
199
+ # @return [Class] WaterDrop producer (sync or async based on the settings)
200
+ def producer(options)
201
+ options[:async] ? WaterDrop::AsyncProducer : WaterDrop::SyncProducer
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional callbacks that are used to trigger some things in given places during the
5
+ # system lifecycle
6
+ # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
7
+ # lifecycle of particular messages fetches but rather to the internal flow process.
8
+ # They cannot be defined on a consumer callback level because for some of those,
9
+ # there aren't consumers in the memory yet and/or they aren't per consumer thread
10
+ module Callbacks
11
+ # Types of system callbacks that we have that are not related to consumers
12
+ TYPES = %i[
13
+ after_init
14
+ before_fetch_loop
15
+ ].freeze
16
+
17
+ class << self
18
+ TYPES.each do |callback_type|
19
+ # Executes given callbacks set at a given moment with provided arguments
20
+ define_method callback_type do |*args|
21
+ Karafka::App
22
+ .config
23
+ .callbacks
24
+ .send(callback_type)
25
+ .each { |callback| callback.call(*args) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # Additional configuration required to store procs that we will execute upon callback trigger
6
+ module Config
7
+ # Builds up internal callback accumulators
8
+ # @param klass [Class] Class that we extend with callback config
9
+ def self.extended(klass)
10
+ # option internal [Hash] - optional - internal karafka configuration settings that should
11
+ # never be changed by users directly
12
+ klass.setting :callbacks do
13
+ Callbacks::TYPES.each do |callback_type|
14
+ # option [Array<Proc>] an array of blocks that will be executed at a given moment
15
+ # depending on the callback type
16
+ setting callback_type, []
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end