karafka 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +1 -0
  4. data/CHANGELOG.md +46 -2
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -2
  7. data/Gemfile.lock +41 -29
  8. data/README.md +13 -19
  9. data/karafka.gemspec +6 -4
  10. data/lib/karafka.rb +17 -7
  11. data/lib/karafka/app.rb +8 -15
  12. data/lib/karafka/attributes_map.rb +1 -1
  13. data/lib/karafka/backends/inline.rb +1 -2
  14. data/lib/karafka/{base_controller.rb → base_consumer.rb} +19 -11
  15. data/lib/karafka/base_responder.rb +34 -15
  16. data/lib/karafka/callbacks.rb +30 -0
  17. data/lib/karafka/callbacks/config.rb +22 -0
  18. data/lib/karafka/callbacks/dsl.rb +16 -0
  19. data/lib/karafka/cli/install.rb +2 -3
  20. data/lib/karafka/cli/server.rb +0 -1
  21. data/lib/karafka/connection/{consumer.rb → client.rb} +32 -36
  22. data/lib/karafka/connection/config_adapter.rb +14 -6
  23. data/lib/karafka/connection/delegator.rb +46 -0
  24. data/lib/karafka/connection/listener.rb +22 -13
  25. data/lib/karafka/{controllers → consumers}/callbacks.rb +9 -9
  26. data/lib/karafka/consumers/includer.rb +51 -0
  27. data/lib/karafka/consumers/responders.rb +24 -0
  28. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  29. data/lib/karafka/errors.rb +10 -3
  30. data/lib/karafka/fetcher.rb +30 -34
  31. data/lib/karafka/helpers/class_matcher.rb +8 -8
  32. data/lib/karafka/helpers/config_retriever.rb +2 -2
  33. data/lib/karafka/instrumentation/listener.rb +112 -0
  34. data/lib/karafka/instrumentation/logger.rb +55 -0
  35. data/lib/karafka/instrumentation/monitor.rb +64 -0
  36. data/lib/karafka/loader.rb +0 -1
  37. data/lib/karafka/params/{params.rb → dsl.rb} +71 -43
  38. data/lib/karafka/params/params_batch.rb +7 -2
  39. data/lib/karafka/patches/dry_configurable.rb +6 -2
  40. data/lib/karafka/patches/ruby_kafka.rb +10 -10
  41. data/lib/karafka/persistence/client.rb +25 -0
  42. data/lib/karafka/persistence/consumer.rb +27 -14
  43. data/lib/karafka/persistence/topic.rb +29 -0
  44. data/lib/karafka/process.rb +5 -4
  45. data/lib/karafka/responders/builder.rb +15 -14
  46. data/lib/karafka/routing/builder.rb +1 -1
  47. data/lib/karafka/routing/consumer_mapper.rb +3 -2
  48. data/lib/karafka/routing/router.rb +1 -1
  49. data/lib/karafka/routing/topic.rb +5 -11
  50. data/lib/karafka/schemas/config.rb +3 -0
  51. data/lib/karafka/schemas/consumer_group.rb +15 -3
  52. data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
  53. data/lib/karafka/server.rb +37 -5
  54. data/lib/karafka/setup/config.rb +47 -21
  55. data/lib/karafka/setup/configurators/base.rb +6 -12
  56. data/lib/karafka/setup/configurators/params.rb +25 -0
  57. data/lib/karafka/setup/configurators/water_drop.rb +6 -3
  58. data/lib/karafka/setup/dsl.rb +22 -0
  59. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  60. data/lib/karafka/templates/karafka.rb.example +17 -4
  61. data/lib/karafka/version.rb +1 -1
  62. metadata +58 -24
  63. data/.github/ISSUE_TEMPLATE.md +0 -2
  64. data/lib/karafka/connection/processor.rb +0 -61
  65. data/lib/karafka/controllers/includer.rb +0 -51
  66. data/lib/karafka/controllers/responders.rb +0 -19
  67. data/lib/karafka/logger.rb +0 -53
  68. data/lib/karafka/monitor.rb +0 -98
  69. data/lib/karafka/persistence/controller.rb +0 -38
@@ -21,7 +21,7 @@ module Karafka
21
21
  offset_retention_time heartbeat_interval
22
22
  ],
23
23
  subscription: %i[start_from_beginning max_bytes_per_partition],
24
- consuming: %i[min_bytes max_wait_time],
24
+ consuming: %i[min_bytes max_bytes max_wait_time],
25
25
  pausing: %i[pause_timeout],
26
26
  # All the options that are under kafka config namespace, but are not used
27
27
  # directly with kafka api, but from the Karafka user perspective, they are
@@ -9,8 +9,7 @@ module Karafka
9
9
 
10
10
  # Executes consume code immediately (without enqueuing)
11
11
  def process
12
- Karafka.monitor.notice(self.class, params_batch)
13
- consume
12
+ Karafka.monitor.instrument('backends.inline.process', caller: self) { consume }
14
13
  end
15
14
  end
16
15
  end
@@ -2,24 +2,32 @@
2
2
 
3
3
  # Karafka module namespace
4
4
  module Karafka
5
- # Base controller from which all Karafka controllers should inherit
6
- class BaseController
5
+ # Base consumer from which all Karafka consumers should inherit
6
+ class BaseConsumer
7
7
  extend ActiveSupport::DescendantsTracker
8
+ extend Forwardable
9
+
10
+ # Allows us to mark messages as consumed for non-automatic mode without having
11
+ # to use consumer client directly. We do this that way, because most of the people should not
12
+ # mess with the client instance directly (just in case)
13
+ def_delegator :client, :mark_as_consumed
14
+
15
+ private :mark_as_consumed
8
16
 
9
17
  class << self
10
18
  attr_reader :topic
11
19
 
12
- # Assigns a topic to a controller and build up proper controller functionalities, so it can
13
- # cooperate with the topic settings
20
+ # Assigns a topic to a consumer and builds up proper consumer functionalities
21
+ # so that it can cooperate with the topic settings
14
22
  # @param topic [Karafka::Routing::Topic]
15
23
  # @return [Karafka::Routing::Topic] assigned topic
16
24
  def topic=(topic)
17
25
  @topic = topic
18
- Controllers::Includer.call(self)
26
+ Consumers::Includer.call(self)
19
27
  end
20
28
  end
21
29
 
22
- # @return [Karafka::Routing::Topic] topic to which a given controller is subscribed
30
+ # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
23
31
  def topic
24
32
  self.class.topic
25
33
  end
@@ -33,20 +41,20 @@ module Karafka
33
41
  @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
34
42
  end
35
43
 
36
- # Executes the default controller flow.
44
+ # Executes the default consumer flow.
37
45
  def call
38
46
  process
39
47
  end
40
48
 
41
49
  private
42
50
 
43
- # We make it private as it should be accesible only from the inside of a controller
51
+ # We make it private as it should be accessible only from the inside of a consumer
44
52
  attr_reader :params_batch
45
53
 
46
- # @return [Karafka::Connection::Consumer] messages consumer that can be used to
54
+ # @return [Karafka::Connection::Client] messages consuming client that can be used to
47
55
  # commit manually offset or pause / stop consumer based on the business logic
48
- def consumer
49
- Persistence::Consumer.read
56
+ def client
57
+ Persistence::Client.read
50
58
  end
51
59
 
52
60
  # Method that will perform business logic and on data received from Kafka (it will consume
@@ -62,6 +62,11 @@ module Karafka
62
62
  # Definitions of all topics that we want to be able to use in this responder should go here
63
63
  class_attribute :topics
64
64
 
65
+ # Schema that we can use to control and/or require some additional details upon options
66
+ # that are being passed to the producer. This can be in particular useful if we want to make
67
+ # sure that for example partition_key is always present.
68
+ class_attribute :options_schema
69
+
65
70
  attr_reader :messages_buffer
66
71
 
67
72
  class << self
@@ -92,7 +97,7 @@ module Karafka
92
97
  # @param parser_class [Class] parser class that we can use to generate appropriate string
93
98
  # or nothing if we want to default to Karafka::Parsers::Json
94
99
  # @return [Karafka::BaseResponder] base responder descendant responder
95
- def initialize(parser_class = Karafka::Parsers::Json)
100
+ def initialize(parser_class = Karafka::App.config.parser)
96
101
  @parser_class = parser_class
97
102
  @messages_buffer = {}
98
103
  end
@@ -108,7 +113,8 @@ module Karafka
108
113
  # UsersCreatedResponder.new(MyParser).call(@created_user)
109
114
  def call(*data)
110
115
  respond(*data)
111
- validate!
116
+ validate_usage!
117
+ validate_options!
112
118
  deliver!
113
119
  end
114
120
 
@@ -116,7 +122,7 @@ module Karafka
116
122
 
117
123
  # Checks if we met all the topics requirements. It will fail if we didn't send a message to
118
124
  # a registered required topic, etc.
119
- def validate!
125
+ def validate_usage!
120
126
  registered_topics = self.class.topics.map do |name, topic|
121
127
  topic.to_h.merge!(
122
128
  usage_count: messages_buffer[name]&.count || 0
@@ -138,20 +144,26 @@ module Karafka
138
144
  raise Karafka::Errors::InvalidResponderUsage, result.errors
139
145
  end
140
146
 
147
+ # Checks if we met all the options requirements before sending them to the producer.
148
+ def validate_options!
149
+ return true unless self.class.options_schema
150
+
151
+ messages_buffer.each_value do |messages_set|
152
+ messages_set.each do |message_data|
153
+ result = self.class.options_schema.call(message_data.last)
154
+ next if result.success?
155
+ raise Karafka::Errors::InvalidResponderMessageOptions, result.errors
156
+ end
157
+ end
158
+ end
159
+
141
160
  # Takes all the messages from the buffer and delivers them one by one
142
161
  # @note This method is executed after the validation, so we're sure that
143
162
  # what we send is legit and it will go to a proper topics
144
163
  def deliver!
145
- messages_buffer.each do |topic, data_elements|
146
- # We map this topic name, so it will match namespaced/etc topic in Kafka
147
- # @note By default will not change topic (if default mapper used)
148
- mapped_topic = Karafka::App.config.topic_mapper.outgoing(topic)
149
-
164
+ messages_buffer.each_value do |data_elements|
150
165
  data_elements.each do |data, options|
151
- producer(options).call(
152
- data,
153
- options.merge(topic: mapped_topic)
154
- )
166
+ producer(options).call(data, options)
155
167
  end
156
168
  end
157
169
  end
@@ -170,10 +182,17 @@ module Karafka
170
182
  # @param options [Hash] options for waterdrop (e.g. partition_key)
171
183
  # @note Respond to does not accept multiple data arguments.
172
184
  def respond_to(topic, data, options = {})
173
- Karafka.monitor.notice(self.class, topic: topic, data: data, options: options)
185
+ # We normalize the format to string, as WaterDrop and Ruby-Kafka support only
186
+ # string topics
187
+ topic = topic.to_s
174
188
 
175
- messages_buffer[topic.to_s] ||= []
176
- messages_buffer[topic.to_s] << [@parser_class.generate(data), options]
189
+ messages_buffer[topic] ||= []
190
+ messages_buffer[topic] << [
191
+ @parser_class.generate(data),
192
+ # We map this topic name, so it will match namespaced/etc topic in Kafka
193
+ # @note By default will not change topic (if default mapper used)
194
+ options.merge(topic: Karafka::App.config.topic_mapper.outgoing(topic))
195
+ ]
177
196
  end
178
197
 
179
198
  # @param options [Hash] options for waterdrop
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional callbacks that are used to trigger some things in given places during the
5
+ # system lifecycle
6
+ # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
7
+ # lifecycle of particular messages fetches but rather to the internal flow process.
8
+ # They cannot be defined on a consumer callback level because for some of those,
9
+ # there aren't consumers in the memory yet and/or they aren't per consumer thread
10
+ module Callbacks
11
+ # Types of system callbacks that we have that are not related to consumers
12
+ TYPES = %i[
13
+ after_init
14
+ before_fetch_loop
15
+ ].freeze
16
+
17
+ class << self
18
+ TYPES.each do |callback_type|
19
+ # Executes given callbacks set at a given moment with provided arguments
20
+ define_method callback_type do |*args|
21
+ Karafka::App
22
+ .config
23
+ .callbacks
24
+ .send(callback_type)
25
+ .each { |callback| callback.call(*args) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # Additional configuration required to store procs that we will execute upon callback trigger
6
+ module Config
7
+ # Builds up internal callback accumulators
8
+ # @param klass [Class] Class that we extend with callback config
9
+ def self.extended(klass)
10
+ # option internal [Hash] - optional - internal karafka configuration settings that should
11
+ # never be changed by users directly
12
+ klass.setting :callbacks do
13
+ Callbacks::TYPES.each do |callback_type|
14
+ # option [Array<Proc>] an array of blocks that will be executed at a given moment
15
+ # depending on the callback type
16
+ setting callback_type, []
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # App level dsl to define callbacks
6
+ module Dsl
7
+ Callbacks::TYPES.each do |callback_type|
8
+ # Allows us to define a block, that will be executed for a given moment
9
+ # @param [Block] block that should be executed after the initialization process
10
+ define_method callback_type do |&block|
11
+ config.callbacks.send(callback_type).push block
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -9,8 +9,7 @@ module Karafka
9
9
 
10
10
  # Directories created by default
11
11
  INSTALL_DIRS = %w[
12
- app/models
13
- app/controllers
12
+ app/consumers
14
13
  app/responders
15
14
  config
16
15
  log
@@ -20,7 +19,7 @@ module Karafka
20
19
  # Where should we map proper files from templates
21
20
  INSTALL_FILES_MAP = {
22
21
  'karafka.rb.example' => Karafka.boot_file.basename,
23
- 'application_controller.rb.example' => 'app/controllers/application_controller.rb',
22
+ 'application_consumer.rb.example' => 'app/consumers/application_consumer.rb',
24
23
  'application_responder.rb.example' => 'app/responders/application_responder.rb'
25
24
  }.freeze
26
25
 
@@ -35,7 +35,6 @@ module Karafka
35
35
  # won't alarm or start new system process up until the current one is finished
36
36
  ObjectSpace.define_finalizer(self, proc { send(:clean) })
37
37
 
38
- # After we fork, we can boot celluloid again
39
38
  Karafka::Server.run
40
39
  end
41
40
 
@@ -2,37 +2,54 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Class used as a wrapper around Ruby-Kafka to simplify additional
5
+ # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
6
  # features that we provide/might provide in future and to hide the internal implementation
7
- class Consumer
8
- # Creates a queue consumer that will pull the data from Kafka
7
+ class Client
8
+ extend Forwardable
9
+
10
+ def_delegator :kafka_consumer, :seek
11
+
12
+ # Creates a queue consumer client that will pull the data from Kafka
9
13
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
14
  # we create a client
11
- # @return [Karafka::Connection::Consumer] group consumer that can subscribe to
15
+ # @return [Karafka::Connection::Client] group consumer that can subscribe to
12
16
  # multiple topics
13
17
  def initialize(consumer_group)
14
18
  @consumer_group = consumer_group
15
- Persistence::Consumer.write(self)
19
+ Persistence::Client.write(self)
16
20
  end
17
21
 
18
22
  # Opens connection, gets messages and calls a block for each of the incoming messages
19
23
  # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
20
24
  # @note This will yield with raw messages - no preprocessing or reformatting.
21
25
  def fetch_loop
22
- send(
23
- consumer_group.batch_fetching ? :consume_each_batch : :consume_each_message
24
- ) { |messages| yield(messages) }
25
- rescue Kafka::ProcessingError => e
26
+ settings = ConfigAdapter.consuming(consumer_group)
27
+
28
+ if consumer_group.batch_fetching
29
+ kafka_consumer.each_batch(*settings) { |batch| yield(batch.messages) }
30
+ else
31
+ # always yield an array of messages, so we have consistent API (always a batch)
32
+ kafka_consumer.each_message(*settings) { |message| yield([message]) }
33
+ end
34
+ rescue Kafka::ProcessingError => error
26
35
  # If there was an error during consumption, we have to log it, pause current partition
27
36
  # and process other things
28
- Karafka.monitor.notice_error(self.class, e.cause)
29
- pause(e.topic, e.partition)
37
+ Karafka.monitor.instrument(
38
+ 'connection.client.fetch_loop.error',
39
+ caller: self,
40
+ error: error.cause
41
+ )
42
+ pause(error.topic, error.partition)
30
43
  retry
31
44
  # This is on purpose - see the notes for this method
32
45
  # rubocop:disable RescueException
33
- rescue Exception => e
46
+ rescue Exception => error
34
47
  # rubocop:enable RescueException
35
- Karafka.monitor.notice_error(self.class, e)
48
+ Karafka.monitor.instrument(
49
+ 'connection.client.fetch_loop.error',
50
+ caller: self,
51
+ error: error
52
+ )
36
53
  retry
37
54
  end
38
55
 
@@ -70,32 +87,11 @@ module Karafka
70
87
 
71
88
  attr_reader :consumer_group
72
89
 
73
- # Consumes messages from Kafka in batches
74
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
75
- def consume_each_batch
76
- kafka_consumer.each_batch(
77
- ConfigAdapter.consuming(consumer_group)
78
- ) do |batch|
79
- yield(batch.messages)
80
- end
81
- end
82
-
83
- # Consumes messages from Kafka one by one
84
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
85
- def consume_each_message
86
- kafka_consumer.each_message(
87
- ConfigAdapter.consuming(consumer_group)
88
- ) do |message|
89
- # always yield an array of messages, so we have consistent API (always a batch)
90
- yield([message])
91
- end
92
- end
93
-
94
90
  # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
95
91
  # that is set up to consume from topics of a given consumer group
96
92
  def kafka_consumer
97
93
  @kafka_consumer ||= kafka.consumer(
98
- ConfigAdapter.consumer(consumer_group)
94
+ *ConfigAdapter.consumer(consumer_group)
99
95
  ).tap do |consumer|
100
96
  consumer_group.topics.each do |topic|
101
97
  consumer.subscribe(*ConfigAdapter.subscription(topic))
@@ -114,7 +110,7 @@ module Karafka
114
110
  # @note We don't cache it internally because we cache kafka_consumer that uses kafka
115
111
  # object instance
116
112
  def kafka
117
- Kafka.new(ConfigAdapter.client(consumer_group))
113
+ Kafka.new(*ConfigAdapter.client(consumer_group))
118
114
  end
119
115
  end
120
116
  end
@@ -14,7 +14,10 @@ module Karafka
14
14
  class << self
15
15
  # Builds all the configuration settings for Kafka.new method
16
16
  # @param _consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
- # @return [Hash] hash with all the settings required by Kafka.new method
17
+ # @return [Array<Hash>] Array with all the client arguments including hash with all
18
+ # the settings required by Kafka.new method
19
+ # @note We return array, so we can inject any arguments we want, in case of changes in the
20
+ # raw driver
18
21
  def client(_consumer_group)
19
22
  # This one is a default that takes all the settings except special
20
23
  # cases defined in the map
@@ -33,28 +36,33 @@ module Karafka
33
36
  settings[setting_name] = setting_value
34
37
  end
35
38
 
36
- sanitize(settings)
39
+ settings_hash = sanitize(settings)
40
+
41
+ # Normalization for the way Kafka::Client accepts arguments from 0.5.3
42
+ [settings_hash.delete(:seed_brokers), settings_hash]
37
43
  end
38
44
 
39
45
  # Builds all the configuration settings for kafka#consumer method
40
46
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
41
- # @return [Hash] hash with all the settings required by Kafka#consumer method
47
+ # @return [Array<Hash>] array with all the consumer arguments including hash with all
48
+ # the settings required by Kafka#consumer
42
49
  def consumer(consumer_group)
43
50
  settings = { group_id: consumer_group.id }
44
51
  settings = fetch_for(:consumer, consumer_group, settings)
45
- sanitize(settings)
52
+ [sanitize(settings)]
46
53
  end
47
54
 
48
55
  # Builds all the configuration settings for kafka consumer consume_each_batch and
49
56
  # consume_each_message methods
50
57
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
51
- # @return [Hash] hash with all the settings required by
58
+ # @return [Array<Hash>] Array with all the arguments required by consuming method
59
+ # including hash with all the settings required by
52
60
  # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
53
61
  def consuming(consumer_group)
54
62
  settings = {
55
63
  automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
56
64
  }
57
- sanitize(fetch_for(:consuming, consumer_group, settings))
65
+ [sanitize(fetch_for(:consuming, consumer_group, settings))]
58
66
  end
59
67
 
60
68
  # Builds all the configuration settings for kafka consumer#subscribe method
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of messages for which we listen to a proper processor
6
+ module Delegator
7
+ class << self
8
+ # Delegates messages (does something with them)
9
+ # It will either schedule or run a proper processor action for messages
10
+ # @note This should be looped to obtain a constant delegating of new messages
11
+ # @note We catch all the errors here, to make sure that none failures
12
+ # for a given consumption will affect other consumed messages
13
+ # If we wouldn't catch it, it would propagate up until killing the thread
14
+ # @note It is a one huge method, because of performance reasons. It is much faster then
15
+ # using send or invoking additional methods
16
+ # @param group_id [String] group_id of a group from which a given message came
17
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
18
+ def call(group_id, kafka_messages)
19
+ # @note We always get messages by topic and partition so we can take topic from the
20
+ # first one and it will be valid for all the messages
21
+ topic = Persistence::Topic.fetch(group_id, kafka_messages[0].topic)
22
+ consumer = Persistence::Consumer.fetch(topic, kafka_messages[0].partition)
23
+
24
+ Karafka.monitor.instrument(
25
+ 'connection.delegator.call',
26
+ caller: self,
27
+ consumer: consumer,
28
+ kafka_messages: kafka_messages
29
+ ) do
30
+ # Depending on a case (persisted or not) we might use new consumer instance per
31
+ # each batch, or use the same one for all of them (for implementing buffering, etc.)
32
+ if topic.batch_consuming
33
+ consumer.params_batch = kafka_messages
34
+ consumer.call
35
+ else
36
+ kafka_messages.each do |kafka_message|
37
+ consumer.params_batch = [kafka_message]
38
+ consumer.call
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end