karafka 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +1 -0
  4. data/CHANGELOG.md +40 -0
  5. data/Gemfile +1 -2
  6. data/Gemfile.lock +41 -29
  7. data/README.md +7 -4
  8. data/karafka.gemspec +6 -4
  9. data/lib/karafka.rb +17 -7
  10. data/lib/karafka/app.rb +8 -15
  11. data/lib/karafka/attributes_map.rb +1 -1
  12. data/lib/karafka/backends/inline.rb +1 -2
  13. data/lib/karafka/{base_controller.rb → base_consumer.rb} +19 -11
  14. data/lib/karafka/base_responder.rb +33 -14
  15. data/lib/karafka/callbacks.rb +30 -0
  16. data/lib/karafka/callbacks/config.rb +22 -0
  17. data/lib/karafka/callbacks/dsl.rb +16 -0
  18. data/lib/karafka/cli/install.rb +2 -3
  19. data/lib/karafka/cli/server.rb +0 -1
  20. data/lib/karafka/connection/{consumer.rb → client.rb} +32 -36
  21. data/lib/karafka/connection/config_adapter.rb +14 -6
  22. data/lib/karafka/connection/delegator.rb +46 -0
  23. data/lib/karafka/connection/listener.rb +22 -13
  24. data/lib/karafka/{controllers → consumers}/callbacks.rb +9 -9
  25. data/lib/karafka/consumers/includer.rb +51 -0
  26. data/lib/karafka/consumers/responders.rb +24 -0
  27. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  28. data/lib/karafka/errors.rb +10 -3
  29. data/lib/karafka/fetcher.rb +30 -34
  30. data/lib/karafka/helpers/class_matcher.rb +8 -8
  31. data/lib/karafka/helpers/config_retriever.rb +2 -2
  32. data/lib/karafka/instrumentation/listener.rb +112 -0
  33. data/lib/karafka/instrumentation/logger.rb +55 -0
  34. data/lib/karafka/instrumentation/monitor.rb +64 -0
  35. data/lib/karafka/loader.rb +0 -1
  36. data/lib/karafka/params/{params.rb → dsl.rb} +71 -43
  37. data/lib/karafka/params/params_batch.rb +7 -2
  38. data/lib/karafka/patches/dry_configurable.rb +6 -2
  39. data/lib/karafka/patches/ruby_kafka.rb +10 -10
  40. data/lib/karafka/persistence/client.rb +25 -0
  41. data/lib/karafka/persistence/consumer.rb +27 -14
  42. data/lib/karafka/persistence/topic.rb +29 -0
  43. data/lib/karafka/process.rb +5 -4
  44. data/lib/karafka/responders/builder.rb +15 -14
  45. data/lib/karafka/routing/builder.rb +1 -1
  46. data/lib/karafka/routing/consumer_mapper.rb +3 -2
  47. data/lib/karafka/routing/router.rb +1 -1
  48. data/lib/karafka/routing/topic.rb +5 -5
  49. data/lib/karafka/schemas/config.rb +3 -0
  50. data/lib/karafka/schemas/consumer_group.rb +15 -3
  51. data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
  52. data/lib/karafka/server.rb +37 -5
  53. data/lib/karafka/setup/config.rb +45 -21
  54. data/lib/karafka/setup/configurators/base.rb +6 -12
  55. data/lib/karafka/setup/configurators/params.rb +25 -0
  56. data/lib/karafka/setup/configurators/water_drop.rb +6 -3
  57. data/lib/karafka/setup/dsl.rb +22 -0
  58. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  59. data/lib/karafka/templates/karafka.rb.example +17 -4
  60. data/lib/karafka/version.rb +1 -1
  61. metadata +58 -23
  62. data/lib/karafka/connection/processor.rb +0 -61
  63. data/lib/karafka/controllers/includer.rb +0 -51
  64. data/lib/karafka/controllers/responders.rb +0 -19
  65. data/lib/karafka/logger.rb +0 -53
  66. data/lib/karafka/monitor.rb +0 -98
  67. data/lib/karafka/persistence/controller.rb +0 -38
@@ -62,6 +62,11 @@ module Karafka
62
62
  # Definitions of all topics that we want to be able to use in this responder should go here
63
63
  class_attribute :topics
64
64
 
65
+ # Schema that we can use to control and/or require some additional details upon options
66
+ # that are being passed to the producer. This can be in particular useful if we want to make
67
+ # sure that for example partition_key is always present.
68
+ class_attribute :options_schema
69
+
65
70
  attr_reader :messages_buffer
66
71
 
67
72
  class << self
@@ -108,7 +113,8 @@ module Karafka
108
113
  # UsersCreatedResponder.new(MyParser).call(@created_user)
109
114
  def call(*data)
110
115
  respond(*data)
111
- validate!
116
+ validate_usage!
117
+ validate_options!
112
118
  deliver!
113
119
  end
114
120
 
@@ -116,7 +122,7 @@ module Karafka
116
122
 
117
123
  # Checks if we met all the topics requirements. It will fail if we didn't send a message to
118
124
  # a registered required topic, etc.
119
- def validate!
125
+ def validate_usage!
120
126
  registered_topics = self.class.topics.map do |name, topic|
121
127
  topic.to_h.merge!(
122
128
  usage_count: messages_buffer[name]&.count || 0
@@ -138,20 +144,26 @@ module Karafka
138
144
  raise Karafka::Errors::InvalidResponderUsage, result.errors
139
145
  end
140
146
 
147
+ # Checks if we met all the options requirements before sending them to the producer.
148
+ def validate_options!
149
+ return true unless self.class.options_schema
150
+
151
+ messages_buffer.each_value do |messages_set|
152
+ messages_set.each do |message_data|
153
+ result = self.class.options_schema.call(message_data.last)
154
+ next if result.success?
155
+ raise Karafka::Errors::InvalidResponderMessageOptions, result.errors
156
+ end
157
+ end
158
+ end
159
+
141
160
  # Takes all the messages from the buffer and delivers them one by one
142
161
  # @note This method is executed after the validation, so we're sure that
143
162
  # what we send is legit and it will go to a proper topics
144
163
  def deliver!
145
- messages_buffer.each do |topic, data_elements|
146
- # We map this topic name, so it will match namespaced/etc topic in Kafka
147
- # @note By default will not change topic (if default mapper used)
148
- mapped_topic = Karafka::App.config.topic_mapper.outgoing(topic)
149
-
164
+ messages_buffer.each_value do |data_elements|
150
165
  data_elements.each do |data, options|
151
- producer(options).call(
152
- data,
153
- options.merge(topic: mapped_topic)
154
- )
166
+ producer(options).call(data, options)
155
167
  end
156
168
  end
157
169
  end
@@ -170,10 +182,17 @@ module Karafka
170
182
  # @param options [Hash] options for waterdrop (e.g. partition_key)
171
183
  # @note Respond to does not accept multiple data arguments.
172
184
  def respond_to(topic, data, options = {})
173
- Karafka.monitor.notice(self.class, topic: topic, data: data, options: options)
185
+ # We normalize the format to string, as WaterDrop and Ruby-Kafka support only
186
+ # string topics
187
+ topic = topic.to_s
174
188
 
175
- messages_buffer[topic.to_s] ||= []
176
- messages_buffer[topic.to_s] << [@parser_class.generate(data), options]
189
+ messages_buffer[topic] ||= []
190
+ messages_buffer[topic] << [
191
+ @parser_class.generate(data),
192
+ # We map this topic name, so it will match namespaced/etc topic in Kafka
193
+ # @note By default will not change topic (if default mapper used)
194
+ options.merge(topic: Karafka::App.config.topic_mapper.outgoing(topic))
195
+ ]
177
196
  end
178
197
 
179
198
  # @param options [Hash] options for waterdrop
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional callbacks that are used to trigger some things in given places during the
5
+ # system lifecycle
6
+ # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
7
+ # lifecycle of particular messages fetches but rather to the internal flow process.
8
+ # They cannot be defined on a consumer callback level because for some of those,
9
+ # there aren't consumers in the memory yet and/or they aren't per consumer thread
10
+ module Callbacks
11
+ # Types of system callbacks that we have that are not related to consumers
12
+ TYPES = %i[
13
+ after_init
14
+ before_fetch_loop
15
+ ].freeze
16
+
17
+ class << self
18
+ TYPES.each do |callback_type|
19
+ # Executes given callbacks set at a given moment with provided arguments
20
+ define_method callback_type do |*args|
21
+ Karafka::App
22
+ .config
23
+ .callbacks
24
+ .send(callback_type)
25
+ .each { |callback| callback.call(*args) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # Additional configuration required to store procs that we will execute upon callback trigger
6
+ module Config
7
+ # Builds up internal callback accumulators
8
+ # @param klass [Class] Class that we extend with callback config
9
+ def self.extended(klass)
10
+ # option internal [Hash] - optional - internal karafka configuration settings that should
11
+ # never be changed by users directly
12
+ klass.setting :callbacks do
13
+ Callbacks::TYPES.each do |callback_type|
14
+ # option [Array<Proc>] an array of blocks that will be executed at a given moment
15
+ # depending on the callback type
16
+ setting callback_type, []
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # App level dsl to define callbacks
6
+ module Dsl
7
+ Callbacks::TYPES.each do |callback_type|
8
+ # Allows us to define a block, that will be executed for a given moment
9
+ # @param [Block] block that should be executed after the initialization process
10
+ define_method callback_type do |&block|
11
+ config.callbacks.send(callback_type).push block
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -9,8 +9,7 @@ module Karafka
9
9
 
10
10
  # Directories created by default
11
11
  INSTALL_DIRS = %w[
12
- app/models
13
- app/controllers
12
+ app/consumers
14
13
  app/responders
15
14
  config
16
15
  log
@@ -20,7 +19,7 @@ module Karafka
20
19
  # Where should we map proper files from templates
21
20
  INSTALL_FILES_MAP = {
22
21
  'karafka.rb.example' => Karafka.boot_file.basename,
23
- 'application_controller.rb.example' => 'app/controllers/application_controller.rb',
22
+ 'application_consumer.rb.example' => 'app/consumers/application_consumer.rb',
24
23
  'application_responder.rb.example' => 'app/responders/application_responder.rb'
25
24
  }.freeze
26
25
 
@@ -35,7 +35,6 @@ module Karafka
35
35
  # won't alarm or start new system process up until the current one is finished
36
36
  ObjectSpace.define_finalizer(self, proc { send(:clean) })
37
37
 
38
- # After we fork, we can boot celluloid again
39
38
  Karafka::Server.run
40
39
  end
41
40
 
@@ -2,37 +2,54 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Class used as a wrapper around Ruby-Kafka to simplify additional
5
+ # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
6
  # features that we provide/might provide in future and to hide the internal implementation
7
- class Consumer
8
- # Creates a queue consumer that will pull the data from Kafka
7
+ class Client
8
+ extend Forwardable
9
+
10
+ def_delegator :kafka_consumer, :seek
11
+
12
+ # Creates a queue consumer client that will pull the data from Kafka
9
13
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
14
  # we create a client
11
- # @return [Karafka::Connection::Consumer] group consumer that can subscribe to
15
+ # @return [Karafka::Connection::Client] group consumer that can subscribe to
12
16
  # multiple topics
13
17
  def initialize(consumer_group)
14
18
  @consumer_group = consumer_group
15
- Persistence::Consumer.write(self)
19
+ Persistence::Client.write(self)
16
20
  end
17
21
 
18
22
  # Opens connection, gets messages and calls a block for each of the incoming messages
19
23
  # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
20
24
  # @note This will yield with raw messages - no preprocessing or reformatting.
21
25
  def fetch_loop
22
- send(
23
- consumer_group.batch_fetching ? :consume_each_batch : :consume_each_message
24
- ) { |messages| yield(messages) }
25
- rescue Kafka::ProcessingError => e
26
+ settings = ConfigAdapter.consuming(consumer_group)
27
+
28
+ if consumer_group.batch_fetching
29
+ kafka_consumer.each_batch(*settings) { |batch| yield(batch.messages) }
30
+ else
31
+ # always yield an array of messages, so we have consistent API (always a batch)
32
+ kafka_consumer.each_message(*settings) { |message| yield([message]) }
33
+ end
34
+ rescue Kafka::ProcessingError => error
26
35
  # If there was an error during consumption, we have to log it, pause current partition
27
36
  # and process other things
28
- Karafka.monitor.notice_error(self.class, e.cause)
29
- pause(e.topic, e.partition)
37
+ Karafka.monitor.instrument(
38
+ 'connection.client.fetch_loop.error',
39
+ caller: self,
40
+ error: error.cause
41
+ )
42
+ pause(error.topic, error.partition)
30
43
  retry
31
44
  # This is on purpose - see the notes for this method
32
45
  # rubocop:disable RescueException
33
- rescue Exception => e
46
+ rescue Exception => error
34
47
  # rubocop:enable RescueException
35
- Karafka.monitor.notice_error(self.class, e)
48
+ Karafka.monitor.instrument(
49
+ 'connection.client.fetch_loop.error',
50
+ caller: self,
51
+ error: error
52
+ )
36
53
  retry
37
54
  end
38
55
 
@@ -70,32 +87,11 @@ module Karafka
70
87
 
71
88
  attr_reader :consumer_group
72
89
 
73
- # Consumes messages from Kafka in batches
74
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
75
- def consume_each_batch
76
- kafka_consumer.each_batch(
77
- ConfigAdapter.consuming(consumer_group)
78
- ) do |batch|
79
- yield(batch.messages)
80
- end
81
- end
82
-
83
- # Consumes messages from Kafka one by one
84
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
85
- def consume_each_message
86
- kafka_consumer.each_message(
87
- ConfigAdapter.consuming(consumer_group)
88
- ) do |message|
89
- # always yield an array of messages, so we have consistent API (always a batch)
90
- yield([message])
91
- end
92
- end
93
-
94
90
  # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
95
91
  # that is set up to consume from topics of a given consumer group
96
92
  def kafka_consumer
97
93
  @kafka_consumer ||= kafka.consumer(
98
- ConfigAdapter.consumer(consumer_group)
94
+ *ConfigAdapter.consumer(consumer_group)
99
95
  ).tap do |consumer|
100
96
  consumer_group.topics.each do |topic|
101
97
  consumer.subscribe(*ConfigAdapter.subscription(topic))
@@ -114,7 +110,7 @@ module Karafka
114
110
  # @note We don't cache it internally because we cache kafka_consumer that uses kafka
115
111
  # object instance
116
112
  def kafka
117
- Kafka.new(ConfigAdapter.client(consumer_group))
113
+ Kafka.new(*ConfigAdapter.client(consumer_group))
118
114
  end
119
115
  end
120
116
  end
@@ -14,7 +14,10 @@ module Karafka
14
14
  class << self
15
15
  # Builds all the configuration settings for Kafka.new method
16
16
  # @param _consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
- # @return [Hash] hash with all the settings required by Kafka.new method
17
+ # @return [Array<Hash>] Array with all the client arguments including hash with all
18
+ # the settings required by Kafka.new method
19
+ # @note We return array, so we can inject any arguments we want, in case of changes in the
20
+ # raw driver
18
21
  def client(_consumer_group)
19
22
  # This one is a default that takes all the settings except special
20
23
  # cases defined in the map
@@ -33,28 +36,33 @@ module Karafka
33
36
  settings[setting_name] = setting_value
34
37
  end
35
38
 
36
- sanitize(settings)
39
+ settings_hash = sanitize(settings)
40
+
41
+ # Normalization for the way Kafka::Client accepts arguments from 0.5.3
42
+ [settings_hash.delete(:seed_brokers), settings_hash]
37
43
  end
38
44
 
39
45
  # Builds all the configuration settings for kafka#consumer method
40
46
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
41
- # @return [Hash] hash with all the settings required by Kafka#consumer method
47
+ # @return [Array<Hash>] array with all the consumer arguments including hash with all
48
+ # the settings required by Kafka#consumer
42
49
  def consumer(consumer_group)
43
50
  settings = { group_id: consumer_group.id }
44
51
  settings = fetch_for(:consumer, consumer_group, settings)
45
- sanitize(settings)
52
+ [sanitize(settings)]
46
53
  end
47
54
 
48
55
  # Builds all the configuration settings for kafka consumer consume_each_batch and
49
56
  # consume_each_message methods
50
57
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
51
- # @return [Hash] hash with all the settings required by
58
+ # @return [Array<Hash>] Array with all the arguments required by consuming method
59
+ # including hash with all the settings required by
52
60
  # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
53
61
  def consuming(consumer_group)
54
62
  settings = {
55
63
  automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
56
64
  }
57
- sanitize(fetch_for(:consuming, consumer_group, settings))
65
+ [sanitize(fetch_for(:consuming, consumer_group, settings))]
58
66
  end
59
67
 
60
68
  # Builds all the configuration settings for kafka consumer#subscribe method
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of messages for which we listen to a proper processor
6
+ module Delegator
7
+ class << self
8
+ # Delegates messages (does something with them)
9
+ # It will either schedule or run a proper processor action for messages
10
+ # @note This should be looped to obtain a constant delegating of new messages
11
+ # @note We catch all the errors here, to make sure that none failures
12
+ # for a given consumption will affect other consumed messages
13
+ # If we wouldn't catch it, it would propagate up until killing the thread
14
+ # @note It is a one huge method, because of performance reasons. It is much faster then
15
+ # using send or invoking additional methods
16
+ # @param group_id [String] group_id of a group from which a given message came
17
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
18
+ def call(group_id, kafka_messages)
19
+ # @note We always get messages by topic and partition so we can take topic from the
20
+ # first one and it will be valid for all the messages
21
+ topic = Persistence::Topic.fetch(group_id, kafka_messages[0].topic)
22
+ consumer = Persistence::Consumer.fetch(topic, kafka_messages[0].partition)
23
+
24
+ Karafka.monitor.instrument(
25
+ 'connection.delegator.call',
26
+ caller: self,
27
+ consumer: consumer,
28
+ kafka_messages: kafka_messages
29
+ ) do
30
+ # Depending on a case (persisted or not) we might use new consumer instance per
31
+ # each batch, or use the same one for all of them (for implementing buffering, etc.)
32
+ if topic.batch_consuming
33
+ consumer.params_batch = kafka_messages
34
+ consumer.call
35
+ else
36
+ kafka_messages.each do |kafka_message|
37
+ consumer.params_batch = [kafka_message]
38
+ consumer.call
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -7,8 +7,6 @@ module Karafka
7
7
  # @note Listener itself does nothing with the message - it will return to the block
8
8
  # a raw Kafka::FetchedMessage
9
9
  class Listener
10
- attr_reader :consumer_group
11
-
12
10
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
13
11
  # on what topics and with what settings should we listen
14
12
  # @return [Karafka::Connection::Listener] listener instance
@@ -16,6 +14,17 @@ module Karafka
16
14
  @consumer_group = consumer_group
17
15
  end
18
16
 
17
+ # Runs prefetch callbacks and executes the main listener fetch loop
18
+ def call
19
+ Karafka::Callbacks.before_fetch_loop(
20
+ @consumer_group,
21
+ client
22
+ )
23
+ fetch_loop
24
+ end
25
+
26
+ private
27
+
19
28
  # Opens connection, gets messages and calls a block for each of the incoming messages
20
29
  # @yieldparam [String] consumer group id
21
30
  # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
@@ -26,25 +35,25 @@ module Karafka
26
35
  # won't crash the whole cluster. Here we mostly focus on catchin the exceptions related to
27
36
  # Kafka connections / Internet connection issues / Etc. Business logic problems should not
28
37
  # propagate this far
29
- def fetch_loop(block)
30
- consumer.fetch_loop do |raw_messages|
31
- block.call(consumer_group.id, raw_messages)
38
+ def fetch_loop
39
+ client.fetch_loop do |raw_messages|
40
+ # @note What happens here is a delegation of processing to a proper processor based
41
+ # on the incoming messages characteristics
42
+ Karafka::Connection::Delegator.call(@consumer_group.id, raw_messages)
32
43
  end
33
44
  # This is on purpose - see the notes for this method
34
45
  # rubocop:disable RescueException
35
46
  rescue Exception => e
47
+ Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
36
48
  # rubocop:enable RescueException
37
- Karafka.monitor.notice_error(self.class, e)
38
- @consumer&.stop
39
- retry if @consumer
49
+ @client&.stop
50
+ retry if @client
40
51
  end
41
52
 
42
- private
43
-
44
- # @return [Karafka::Connection::Consumer] wrapped kafka consumer for a given topic
53
+ # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
45
54
  # consumption
46
- def consumer
47
- @consumer ||= Consumer.new(consumer_group)
55
+ def client
56
+ @client ||= Client.new(@consumer_group)
48
57
  end
49
58
  end
50
59
  end