karafka 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +1 -0
  4. data/CHANGELOG.md +46 -2
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -2
  7. data/Gemfile.lock +41 -29
  8. data/README.md +13 -19
  9. data/karafka.gemspec +6 -4
  10. data/lib/karafka.rb +17 -7
  11. data/lib/karafka/app.rb +8 -15
  12. data/lib/karafka/attributes_map.rb +1 -1
  13. data/lib/karafka/backends/inline.rb +1 -2
  14. data/lib/karafka/{base_controller.rb → base_consumer.rb} +19 -11
  15. data/lib/karafka/base_responder.rb +34 -15
  16. data/lib/karafka/callbacks.rb +30 -0
  17. data/lib/karafka/callbacks/config.rb +22 -0
  18. data/lib/karafka/callbacks/dsl.rb +16 -0
  19. data/lib/karafka/cli/install.rb +2 -3
  20. data/lib/karafka/cli/server.rb +0 -1
  21. data/lib/karafka/connection/{consumer.rb → client.rb} +32 -36
  22. data/lib/karafka/connection/config_adapter.rb +14 -6
  23. data/lib/karafka/connection/delegator.rb +46 -0
  24. data/lib/karafka/connection/listener.rb +22 -13
  25. data/lib/karafka/{controllers → consumers}/callbacks.rb +9 -9
  26. data/lib/karafka/consumers/includer.rb +51 -0
  27. data/lib/karafka/consumers/responders.rb +24 -0
  28. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  29. data/lib/karafka/errors.rb +10 -3
  30. data/lib/karafka/fetcher.rb +30 -34
  31. data/lib/karafka/helpers/class_matcher.rb +8 -8
  32. data/lib/karafka/helpers/config_retriever.rb +2 -2
  33. data/lib/karafka/instrumentation/listener.rb +112 -0
  34. data/lib/karafka/instrumentation/logger.rb +55 -0
  35. data/lib/karafka/instrumentation/monitor.rb +64 -0
  36. data/lib/karafka/loader.rb +0 -1
  37. data/lib/karafka/params/{params.rb → dsl.rb} +71 -43
  38. data/lib/karafka/params/params_batch.rb +7 -2
  39. data/lib/karafka/patches/dry_configurable.rb +6 -2
  40. data/lib/karafka/patches/ruby_kafka.rb +10 -10
  41. data/lib/karafka/persistence/client.rb +25 -0
  42. data/lib/karafka/persistence/consumer.rb +27 -14
  43. data/lib/karafka/persistence/topic.rb +29 -0
  44. data/lib/karafka/process.rb +5 -4
  45. data/lib/karafka/responders/builder.rb +15 -14
  46. data/lib/karafka/routing/builder.rb +1 -1
  47. data/lib/karafka/routing/consumer_mapper.rb +3 -2
  48. data/lib/karafka/routing/router.rb +1 -1
  49. data/lib/karafka/routing/topic.rb +5 -11
  50. data/lib/karafka/schemas/config.rb +3 -0
  51. data/lib/karafka/schemas/consumer_group.rb +15 -3
  52. data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
  53. data/lib/karafka/server.rb +37 -5
  54. data/lib/karafka/setup/config.rb +47 -21
  55. data/lib/karafka/setup/configurators/base.rb +6 -12
  56. data/lib/karafka/setup/configurators/params.rb +25 -0
  57. data/lib/karafka/setup/configurators/water_drop.rb +6 -3
  58. data/lib/karafka/setup/dsl.rb +22 -0
  59. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  60. data/lib/karafka/templates/karafka.rb.example +17 -4
  61. data/lib/karafka/version.rb +1 -1
  62. metadata +58 -24
  63. data/.github/ISSUE_TEMPLATE.md +0 -2
  64. data/lib/karafka/connection/processor.rb +0 -61
  65. data/lib/karafka/controllers/includer.rb +0 -51
  66. data/lib/karafka/controllers/responders.rb +0 -19
  67. data/lib/karafka/logger.rb +0 -53
  68. data/lib/karafka/monitor.rb +0 -98
  69. data/lib/karafka/persistence/controller.rb +0 -38
@@ -21,7 +21,7 @@ module Karafka
21
21
  offset_retention_time heartbeat_interval
22
22
  ],
23
23
  subscription: %i[start_from_beginning max_bytes_per_partition],
24
- consuming: %i[min_bytes max_wait_time],
24
+ consuming: %i[min_bytes max_bytes max_wait_time],
25
25
  pausing: %i[pause_timeout],
26
26
  # All the options that are under kafka config namespace, but are not used
27
27
  # directly with kafka api, but from the Karafka user perspective, they are
@@ -9,8 +9,7 @@ module Karafka
9
9
 
10
10
  # Executes consume code immediately (without enqueuing)
11
11
  def process
12
- Karafka.monitor.notice(self.class, params_batch)
13
- consume
12
+ Karafka.monitor.instrument('backends.inline.process', caller: self) { consume }
14
13
  end
15
14
  end
16
15
  end
@@ -2,24 +2,32 @@
2
2
 
3
3
  # Karafka module namespace
4
4
  module Karafka
5
- # Base controller from which all Karafka controllers should inherit
6
- class BaseController
5
+ # Base consumer from which all Karafka consumers should inherit
6
+ class BaseConsumer
7
7
  extend ActiveSupport::DescendantsTracker
8
+ extend Forwardable
9
+
10
+ # Allows us to mark messages as consumed for non-automatic mode without having
11
+ # to use consumer client directly. We do this that way, because most of the people should not
12
+ # mess with the client instance directly (just in case)
13
+ def_delegator :client, :mark_as_consumed
14
+
15
+ private :mark_as_consumed
8
16
 
9
17
  class << self
10
18
  attr_reader :topic
11
19
 
12
- # Assigns a topic to a controller and build up proper controller functionalities, so it can
13
- # cooperate with the topic settings
20
+ # Assigns a topic to a consumer and builds up proper consumer functionalities
21
+ # so that it can cooperate with the topic settings
14
22
  # @param topic [Karafka::Routing::Topic]
15
23
  # @return [Karafka::Routing::Topic] assigned topic
16
24
  def topic=(topic)
17
25
  @topic = topic
18
- Controllers::Includer.call(self)
26
+ Consumers::Includer.call(self)
19
27
  end
20
28
  end
21
29
 
22
- # @return [Karafka::Routing::Topic] topic to which a given controller is subscribed
30
+ # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
23
31
  def topic
24
32
  self.class.topic
25
33
  end
@@ -33,20 +41,20 @@ module Karafka
33
41
  @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
34
42
  end
35
43
 
36
- # Executes the default controller flow.
44
+ # Executes the default consumer flow.
37
45
  def call
38
46
  process
39
47
  end
40
48
 
41
49
  private
42
50
 
43
- # We make it private as it should be accesible only from the inside of a controller
51
+ # We make it private as it should be accessible only from the inside of a consumer
44
52
  attr_reader :params_batch
45
53
 
46
- # @return [Karafka::Connection::Consumer] messages consumer that can be used to
54
+ # @return [Karafka::Connection::Client] messages consuming client that can be used to
47
55
  # commit manually offset or pause / stop consumer based on the business logic
48
- def consumer
49
- Persistence::Consumer.read
56
+ def client
57
+ Persistence::Client.read
50
58
  end
51
59
 
52
60
  # Method that will perform business logic and on data received from Kafka (it will consume
@@ -62,6 +62,11 @@ module Karafka
62
62
  # Definitions of all topics that we want to be able to use in this responder should go here
63
63
  class_attribute :topics
64
64
 
65
+ # Schema that we can use to control and/or require some additional details upon options
66
+ # that are being passed to the producer. This can be in particular useful if we want to make
67
+ # sure that for example partition_key is always present.
68
+ class_attribute :options_schema
69
+
65
70
  attr_reader :messages_buffer
66
71
 
67
72
  class << self
@@ -92,7 +97,7 @@ module Karafka
92
97
  # @param parser_class [Class] parser class that we can use to generate appropriate string
93
98
  # or nothing if we want to default to Karafka::Parsers::Json
94
99
  # @return [Karafka::BaseResponder] base responder descendant responder
95
- def initialize(parser_class = Karafka::Parsers::Json)
100
+ def initialize(parser_class = Karafka::App.config.parser)
96
101
  @parser_class = parser_class
97
102
  @messages_buffer = {}
98
103
  end
@@ -108,7 +113,8 @@ module Karafka
108
113
  # UsersCreatedResponder.new(MyParser).call(@created_user)
109
114
  def call(*data)
110
115
  respond(*data)
111
- validate!
116
+ validate_usage!
117
+ validate_options!
112
118
  deliver!
113
119
  end
114
120
 
@@ -116,7 +122,7 @@ module Karafka
116
122
 
117
123
  # Checks if we met all the topics requirements. It will fail if we didn't send a message to
118
124
  # a registered required topic, etc.
119
- def validate!
125
+ def validate_usage!
120
126
  registered_topics = self.class.topics.map do |name, topic|
121
127
  topic.to_h.merge!(
122
128
  usage_count: messages_buffer[name]&.count || 0
@@ -138,20 +144,26 @@ module Karafka
138
144
  raise Karafka::Errors::InvalidResponderUsage, result.errors
139
145
  end
140
146
 
147
+ # Checks if we met all the options requirements before sending them to the producer.
148
+ def validate_options!
149
+ return true unless self.class.options_schema
150
+
151
+ messages_buffer.each_value do |messages_set|
152
+ messages_set.each do |message_data|
153
+ result = self.class.options_schema.call(message_data.last)
154
+ next if result.success?
155
+ raise Karafka::Errors::InvalidResponderMessageOptions, result.errors
156
+ end
157
+ end
158
+ end
159
+
141
160
  # Takes all the messages from the buffer and delivers them one by one
142
161
  # @note This method is executed after the validation, so we're sure that
143
162
  # what we send is legit and it will go to a proper topics
144
163
  def deliver!
145
- messages_buffer.each do |topic, data_elements|
146
- # We map this topic name, so it will match namespaced/etc topic in Kafka
147
- # @note By default will not change topic (if default mapper used)
148
- mapped_topic = Karafka::App.config.topic_mapper.outgoing(topic)
149
-
164
+ messages_buffer.each_value do |data_elements|
150
165
  data_elements.each do |data, options|
151
- producer(options).call(
152
- data,
153
- options.merge(topic: mapped_topic)
154
- )
166
+ producer(options).call(data, options)
155
167
  end
156
168
  end
157
169
  end
@@ -170,10 +182,17 @@ module Karafka
170
182
  # @param options [Hash] options for waterdrop (e.g. partition_key)
171
183
  # @note Respond to does not accept multiple data arguments.
172
184
  def respond_to(topic, data, options = {})
173
- Karafka.monitor.notice(self.class, topic: topic, data: data, options: options)
185
+ # We normalize the format to string, as WaterDrop and Ruby-Kafka support only
186
+ # string topics
187
+ topic = topic.to_s
174
188
 
175
- messages_buffer[topic.to_s] ||= []
176
- messages_buffer[topic.to_s] << [@parser_class.generate(data), options]
189
+ messages_buffer[topic] ||= []
190
+ messages_buffer[topic] << [
191
+ @parser_class.generate(data),
192
+ # We map this topic name, so it will match namespaced/etc topic in Kafka
193
+ # @note By default will not change topic (if default mapper used)
194
+ options.merge(topic: Karafka::App.config.topic_mapper.outgoing(topic))
195
+ ]
177
196
  end
178
197
 
179
198
  # @param options [Hash] options for waterdrop
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional callbacks that are used to trigger some things in given places during the
5
+ # system lifecycle
6
+ # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
7
+ # lifecycle of particular messages fetches but rather to the internal flow process.
8
+ # They cannot be defined on a consumer callback level because for some of those,
9
+ # there aren't consumers in the memory yet and/or they aren't per consumer thread
10
+ module Callbacks
11
+ # Types of system callbacks that we have that are not related to consumers
12
+ TYPES = %i[
13
+ after_init
14
+ before_fetch_loop
15
+ ].freeze
16
+
17
+ class << self
18
+ TYPES.each do |callback_type|
19
+ # Executes given callbacks set at a given moment with provided arguments
20
+ define_method callback_type do |*args|
21
+ Karafka::App
22
+ .config
23
+ .callbacks
24
+ .send(callback_type)
25
+ .each { |callback| callback.call(*args) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # Additional configuration required to store procs that we will execute upon callback trigger
6
+ module Config
7
+ # Builds up internal callback accumulators
8
+ # @param klass [Class] Class that we extend with callback config
9
+ def self.extended(klass)
10
+ # option internal [Hash] - optional - internal karafka configuration settings that should
11
+ # never be changed by users directly
12
+ klass.setting :callbacks do
13
+ Callbacks::TYPES.each do |callback_type|
14
+ # option [Array<Proc>] an array of blocks that will be executed at a given moment
15
+ # depending on the callback type
16
+ setting callback_type, []
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # App level dsl to define callbacks
6
+ module Dsl
7
+ Callbacks::TYPES.each do |callback_type|
8
+ # Allows us to define a block, that will be executed for a given moment
9
+ # @param [Block] block that should be executed after the initialization process
10
+ define_method callback_type do |&block|
11
+ config.callbacks.send(callback_type).push block
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -9,8 +9,7 @@ module Karafka
9
9
 
10
10
  # Directories created by default
11
11
  INSTALL_DIRS = %w[
12
- app/models
13
- app/controllers
12
+ app/consumers
14
13
  app/responders
15
14
  config
16
15
  log
@@ -20,7 +19,7 @@ module Karafka
20
19
  # Where should we map proper files from templates
21
20
  INSTALL_FILES_MAP = {
22
21
  'karafka.rb.example' => Karafka.boot_file.basename,
23
- 'application_controller.rb.example' => 'app/controllers/application_controller.rb',
22
+ 'application_consumer.rb.example' => 'app/consumers/application_consumer.rb',
24
23
  'application_responder.rb.example' => 'app/responders/application_responder.rb'
25
24
  }.freeze
26
25
 
@@ -35,7 +35,6 @@ module Karafka
35
35
  # won't alarm or start new system process up until the current one is finished
36
36
  ObjectSpace.define_finalizer(self, proc { send(:clean) })
37
37
 
38
- # After we fork, we can boot celluloid again
39
38
  Karafka::Server.run
40
39
  end
41
40
 
@@ -2,37 +2,54 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Class used as a wrapper around Ruby-Kafka to simplify additional
5
+ # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
6
  # features that we provide/might provide in future and to hide the internal implementation
7
- class Consumer
8
- # Creates a queue consumer that will pull the data from Kafka
7
+ class Client
8
+ extend Forwardable
9
+
10
+ def_delegator :kafka_consumer, :seek
11
+
12
+ # Creates a queue consumer client that will pull the data from Kafka
9
13
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
14
  # we create a client
11
- # @return [Karafka::Connection::Consumer] group consumer that can subscribe to
15
+ # @return [Karafka::Connection::Client] group consumer that can subscribe to
12
16
  # multiple topics
13
17
  def initialize(consumer_group)
14
18
  @consumer_group = consumer_group
15
- Persistence::Consumer.write(self)
19
+ Persistence::Client.write(self)
16
20
  end
17
21
 
18
22
  # Opens connection, gets messages and calls a block for each of the incoming messages
19
23
  # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
20
24
  # @note This will yield with raw messages - no preprocessing or reformatting.
21
25
  def fetch_loop
22
- send(
23
- consumer_group.batch_fetching ? :consume_each_batch : :consume_each_message
24
- ) { |messages| yield(messages) }
25
- rescue Kafka::ProcessingError => e
26
+ settings = ConfigAdapter.consuming(consumer_group)
27
+
28
+ if consumer_group.batch_fetching
29
+ kafka_consumer.each_batch(*settings) { |batch| yield(batch.messages) }
30
+ else
31
+ # always yield an array of messages, so we have consistent API (always a batch)
32
+ kafka_consumer.each_message(*settings) { |message| yield([message]) }
33
+ end
34
+ rescue Kafka::ProcessingError => error
26
35
  # If there was an error during consumption, we have to log it, pause current partition
27
36
  # and process other things
28
- Karafka.monitor.notice_error(self.class, e.cause)
29
- pause(e.topic, e.partition)
37
+ Karafka.monitor.instrument(
38
+ 'connection.client.fetch_loop.error',
39
+ caller: self,
40
+ error: error.cause
41
+ )
42
+ pause(error.topic, error.partition)
30
43
  retry
31
44
  # This is on purpose - see the notes for this method
32
45
  # rubocop:disable RescueException
33
- rescue Exception => e
46
+ rescue Exception => error
34
47
  # rubocop:enable RescueException
35
- Karafka.monitor.notice_error(self.class, e)
48
+ Karafka.monitor.instrument(
49
+ 'connection.client.fetch_loop.error',
50
+ caller: self,
51
+ error: error
52
+ )
36
53
  retry
37
54
  end
38
55
 
@@ -70,32 +87,11 @@ module Karafka
70
87
 
71
88
  attr_reader :consumer_group
72
89
 
73
- # Consumes messages from Kafka in batches
74
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
75
- def consume_each_batch
76
- kafka_consumer.each_batch(
77
- ConfigAdapter.consuming(consumer_group)
78
- ) do |batch|
79
- yield(batch.messages)
80
- end
81
- end
82
-
83
- # Consumes messages from Kafka one by one
84
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
85
- def consume_each_message
86
- kafka_consumer.each_message(
87
- ConfigAdapter.consuming(consumer_group)
88
- ) do |message|
89
- # always yield an array of messages, so we have consistent API (always a batch)
90
- yield([message])
91
- end
92
- end
93
-
94
90
  # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
95
91
  # that is set up to consume from topics of a given consumer group
96
92
  def kafka_consumer
97
93
  @kafka_consumer ||= kafka.consumer(
98
- ConfigAdapter.consumer(consumer_group)
94
+ *ConfigAdapter.consumer(consumer_group)
99
95
  ).tap do |consumer|
100
96
  consumer_group.topics.each do |topic|
101
97
  consumer.subscribe(*ConfigAdapter.subscription(topic))
@@ -114,7 +110,7 @@ module Karafka
114
110
  # @note We don't cache it internally because we cache kafka_consumer that uses kafka
115
111
  # object instance
116
112
  def kafka
117
- Kafka.new(ConfigAdapter.client(consumer_group))
113
+ Kafka.new(*ConfigAdapter.client(consumer_group))
118
114
  end
119
115
  end
120
116
  end
@@ -14,7 +14,10 @@ module Karafka
14
14
  class << self
15
15
  # Builds all the configuration settings for Kafka.new method
16
16
  # @param _consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
- # @return [Hash] hash with all the settings required by Kafka.new method
17
+ # @return [Array<Hash>] Array with all the client arguments including hash with all
18
+ # the settings required by Kafka.new method
19
+ # @note We return array, so we can inject any arguments we want, in case of changes in the
20
+ # raw driver
18
21
  def client(_consumer_group)
19
22
  # This one is a default that takes all the settings except special
20
23
  # cases defined in the map
@@ -33,28 +36,33 @@ module Karafka
33
36
  settings[setting_name] = setting_value
34
37
  end
35
38
 
36
- sanitize(settings)
39
+ settings_hash = sanitize(settings)
40
+
41
+ # Normalization for the way Kafka::Client accepts arguments from 0.5.3
42
+ [settings_hash.delete(:seed_brokers), settings_hash]
37
43
  end
38
44
 
39
45
  # Builds all the configuration settings for kafka#consumer method
40
46
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
41
- # @return [Hash] hash with all the settings required by Kafka#consumer method
47
+ # @return [Array<Hash>] array with all the consumer arguments including hash with all
48
+ # the settings required by Kafka#consumer
42
49
  def consumer(consumer_group)
43
50
  settings = { group_id: consumer_group.id }
44
51
  settings = fetch_for(:consumer, consumer_group, settings)
45
- sanitize(settings)
52
+ [sanitize(settings)]
46
53
  end
47
54
 
48
55
  # Builds all the configuration settings for kafka consumer consume_each_batch and
49
56
  # consume_each_message methods
50
57
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
51
- # @return [Hash] hash with all the settings required by
58
+ # @return [Array<Hash>] Array with all the arguments required by consuming method
59
+ # including hash with all the settings required by
52
60
  # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
53
61
  def consuming(consumer_group)
54
62
  settings = {
55
63
  automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
56
64
  }
57
- sanitize(fetch_for(:consuming, consumer_group, settings))
65
+ [sanitize(fetch_for(:consuming, consumer_group, settings))]
58
66
  end
59
67
 
60
68
  # Builds all the configuration settings for kafka consumer#subscribe method
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of messages for which we listen to a proper processor
6
+ module Delegator
7
+ class << self
8
+ # Delegates messages (does something with them)
9
+ # It will either schedule or run a proper processor action for messages
10
+ # @note This should be looped to obtain a constant delegating of new messages
11
+ # @note We catch all the errors here, to make sure that none failures
12
+ # for a given consumption will affect other consumed messages
13
+ # If we wouldn't catch it, it would propagate up until killing the thread
14
+ # @note It is a one huge method, because of performance reasons. It is much faster then
15
+ # using send or invoking additional methods
16
+ # @param group_id [String] group_id of a group from which a given message came
17
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
18
+ def call(group_id, kafka_messages)
19
+ # @note We always get messages by topic and partition so we can take topic from the
20
+ # first one and it will be valid for all the messages
21
+ topic = Persistence::Topic.fetch(group_id, kafka_messages[0].topic)
22
+ consumer = Persistence::Consumer.fetch(topic, kafka_messages[0].partition)
23
+
24
+ Karafka.monitor.instrument(
25
+ 'connection.delegator.call',
26
+ caller: self,
27
+ consumer: consumer,
28
+ kafka_messages: kafka_messages
29
+ ) do
30
+ # Depending on a case (persisted or not) we might use new consumer instance per
31
+ # each batch, or use the same one for all of them (for implementing buffering, etc.)
32
+ if topic.batch_consuming
33
+ consumer.params_batch = kafka_messages
34
+ consumer.call
35
+ else
36
+ kafka_messages.each do |kafka_message|
37
+ consumer.params_batch = [kafka_message]
38
+ consumer.call
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end