karafka 2.0.0.alpha5 → 2.0.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.ruby-version +1 -1
  4. data/CHANGELOG.md +35 -2
  5. data/Gemfile.lock +6 -6
  6. data/bin/integrations +55 -43
  7. data/config/errors.yml +1 -0
  8. data/docker-compose.yml +4 -1
  9. data/lib/active_job/karafka.rb +2 -2
  10. data/lib/karafka/active_job/routing/extensions.rb +21 -0
  11. data/lib/karafka/base_consumer.rb +65 -12
  12. data/lib/karafka/connection/client.rb +36 -6
  13. data/lib/karafka/connection/listener.rb +92 -27
  14. data/lib/karafka/connection/listeners_batch.rb +24 -0
  15. data/lib/karafka/connection/messages_buffer.rb +49 -22
  16. data/lib/karafka/connection/pauses_manager.rb +2 -2
  17. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  18. data/lib/karafka/connection/rebalance_manager.rb +35 -20
  19. data/lib/karafka/contracts/config.rb +8 -0
  20. data/lib/karafka/helpers/async.rb +33 -0
  21. data/lib/karafka/instrumentation/monitor.rb +2 -1
  22. data/lib/karafka/messages/batch_metadata.rb +26 -3
  23. data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
  24. data/lib/karafka/messages/builders/message.rb +1 -0
  25. data/lib/karafka/messages/builders/messages.rb +4 -12
  26. data/lib/karafka/pro/active_job/consumer.rb +21 -0
  27. data/lib/karafka/pro/active_job/dispatcher.rb +10 -10
  28. data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
  29. data/lib/karafka/pro/loader.rb +17 -8
  30. data/lib/karafka/pro/performance_tracker.rb +80 -0
  31. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  32. data/lib/karafka/pro/scheduler.rb +54 -0
  33. data/lib/karafka/processing/executor.rb +19 -11
  34. data/lib/karafka/processing/executors_buffer.rb +15 -7
  35. data/lib/karafka/processing/jobs/base.rb +28 -0
  36. data/lib/karafka/processing/jobs/consume.rb +11 -4
  37. data/lib/karafka/processing/jobs_queue.rb +28 -16
  38. data/lib/karafka/processing/worker.rb +30 -9
  39. data/lib/karafka/processing/workers_batch.rb +5 -0
  40. data/lib/karafka/railtie.rb +12 -0
  41. data/lib/karafka/routing/consumer_group.rb +1 -1
  42. data/lib/karafka/routing/subscription_group.rb +1 -1
  43. data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
  44. data/lib/karafka/routing/topics.rb +38 -0
  45. data/lib/karafka/runner.rb +19 -27
  46. data/lib/karafka/scheduler.rb +20 -0
  47. data/lib/karafka/server.rb +24 -23
  48. data/lib/karafka/setup/config.rb +4 -1
  49. data/lib/karafka/time_trackers/pause.rb +10 -2
  50. data/lib/karafka/version.rb +1 -1
  51. data.tar.gz.sig +0 -0
  52. metadata +13 -4
  53. metadata.gz.sig +0 -0
  54. data/lib/karafka/active_job/routing_extensions.rb +0 -18
@@ -3,9 +3,13 @@
3
3
  module Karafka
4
4
  module Connection
5
5
  # A single listener that listens to incoming messages from a single subscription group.
6
- # It polls the messages and then enqueues. It also takes care of potential recovery from
6
+ # It polls the messages and then enqueues jobs. It also takes care of potential recovery from
7
7
  # critical errors by restarting everything in a safe manner.
8
+ #
9
+ # This is the heart of the consumption process.
8
10
  class Listener
11
+ include Helpers::Async
12
+
9
13
  # @param subscription_group [Karafka::Routing::SubscriptionGroup]
10
14
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
11
15
  # @return [Karafka::Connection::Listener] listener instance
@@ -15,6 +19,12 @@ module Karafka
15
19
  @pauses_manager = PausesManager.new
16
20
  @client = Client.new(@subscription_group)
17
21
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
22
+ # We reference scheduler here as it is much faster than fetching this each time
23
+ @scheduler = ::Karafka::App.config.internal.scheduler
24
+ # We keep one buffer for messages to preserve memory and not allocate extra objects
25
+ # We can do this that way because we always first schedule jobs using messages before we
26
+ # fetch another batch.
27
+ @messages_buffer = MessagesBuffer.new(subscription_group)
18
28
  end
19
29
 
20
30
  # Runs the main listener fetch loop.
@@ -51,33 +61,55 @@ module Karafka
51
61
  )
52
62
 
53
63
  resume_paused_partitions
64
+
54
65
  # We need to fetch data before we revoke lost partitions details as during the polling
55
66
  # the callbacks for tracking lost partitions are triggered. Otherwise we would be always
56
67
  # one batch behind.
57
- messages_buffer = @client.batch_poll
68
+ poll_and_remap_messages
58
69
 
59
70
  Karafka.monitor.instrument(
60
71
  'connection.listener.fetch_loop.received',
61
72
  caller: self,
62
- messages_buffer: messages_buffer
73
+ messages_buffer: @messages_buffer
63
74
  )
64
75
 
65
76
  # If there were revoked partitions, we need to wait on their jobs to finish before
66
77
  # distributing consuming jobs as upon revoking, we might get assigned to the same
67
78
  # partitions, thus getting their jobs. The revoking jobs need to finish before
68
79
  # appropriate consumers are taken down and re-created
69
- wait(@subscription_group) if distribute_revoke_lost_partitions_jobs
70
-
71
- distribute_partitions_jobs(messages_buffer)
80
+ build_and_schedule_revoke_lost_partitions_jobs
72
81
 
73
82
  # We wait only on jobs from our subscription group. Other groups are independent.
74
- wait(@subscription_group)
83
+ wait
84
+
85
+ build_and_schedule_consumption_jobs
86
+
87
+ wait
75
88
 
76
89
  # We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
77
90
  # if needed by using manual offset management.
78
91
  @client.commit_offsets
79
92
  end
80
93
 
94
+ # If we are stopping we will no longer schedule any jobs despite polling.
95
+ # We need to keep polling not to exceed the `max.poll.interval` for long-running
96
+ # non-blocking jobs and we need to allow them to finish. We however do not want to
97
+ # enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
98
+ # to detect shutdown in their long-running logic or else Karafka will force shutdown
99
+ # after a while.
100
+ #
101
+ # We do not care about resuming any partitions or lost jobs as we do not plan to do
102
+ # anything with them as we're in the shutdown phase.
103
+ wait_with_poll
104
+
105
+ # We do not want to schedule the shutdown jobs prior to finishing all the jobs
106
+ # (including non-blocking) as there might be a long-running job with a shutdown and then
107
+ # we would run two jobs in parallel for the same executor and consumer. We do not want that
108
+ # as it could create a race-condition.
109
+ build_and_schedule_shutdown_jobs
110
+
111
+ wait_with_poll
112
+
81
113
  shutdown
82
114
 
83
115
  # This is on purpose - see the notes for this method
@@ -98,55 +130,88 @@ module Karafka
98
130
 
99
131
  # Resumes processing of partitions that were paused due to an error.
100
132
  def resume_paused_partitions
101
- @pauses_manager.resume { |topic, partition| @client.resume(topic, partition) }
133
+ @pauses_manager.resume do |topic, partition|
134
+ @client.resume(topic, partition)
135
+ end
102
136
  end
103
137
 
104
138
  # Enqueues revoking jobs for partitions that were taken away from the running process.
105
- # @return [Boolean] was there anything to revoke
106
- def distribute_revoke_lost_partitions_jobs
139
+ def build_and_schedule_revoke_lost_partitions_jobs
107
140
  revoked_partitions = @client.rebalance_manager.revoked_partitions
108
141
 
109
- return false if revoked_partitions.empty?
142
+ # Stop early to save on some execution and array allocation
143
+ return if revoked_partitions.empty?
144
+
145
+ jobs = []
110
146
 
111
147
  revoked_partitions.each do |topic, partitions|
112
148
  partitions.each do |partition|
113
- pause = @pauses_manager.fetch(topic, partition)
114
- executor = @executors.fetch(topic, partition, pause)
115
- @jobs_queue << Processing::Jobs::Revoked.new(executor)
149
+ pause_tracker = @pauses_manager.fetch(topic, partition)
150
+ executor = @executors.fetch(topic, partition, pause_tracker)
151
+ jobs << Processing::Jobs::Revoked.new(executor)
116
152
  end
117
153
  end
118
154
 
119
- true
155
+ @scheduler.schedule_revocation(@jobs_queue, jobs)
120
156
  end
121
157
 
122
- # Takes the messages per topic partition and enqueues processing jobs in threads.
158
+ # Enqueues the shutdown jobs for all the executors that exist in our subscription group
159
+ def build_and_schedule_shutdown_jobs
160
+ jobs = []
161
+
162
+ @executors.each do |_, _, executor|
163
+ jobs << Processing::Jobs::Shutdown.new(executor)
164
+ end
165
+
166
+ @scheduler.schedule_shutdown(@jobs_queue, jobs)
167
+ end
168
+
169
+ # Polls messages within the time and amount boundaries defined in the settings and then
170
+ # builds karafka messages based on the raw rdkafka messages buffer returned by the
171
+ # `#batch_poll` method.
123
172
  #
124
- # @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
125
- def distribute_partitions_jobs(messages_buffer)
126
- messages_buffer.each do |topic, partition, messages|
173
+ # @note There are two buffers, one for raw messages and one for "built" karafka messages
174
+ def poll_and_remap_messages
175
+ @messages_buffer.remap(
176
+ @client.batch_poll
177
+ )
178
+ end
179
+
180
+ # Takes the messages per topic partition and enqueues processing jobs in threads using
181
+ # given scheduler.
182
+ def build_and_schedule_consumption_jobs
183
+ return if @messages_buffer.empty?
184
+
185
+ jobs = []
186
+
187
+ @messages_buffer.each do |topic, partition, messages|
127
188
  pause = @pauses_manager.fetch(topic, partition)
128
189
 
129
190
  next if pause.paused?
130
191
 
131
192
  executor = @executors.fetch(topic, partition, pause)
132
193
 
133
- @jobs_queue << Processing::Jobs::Consume.new(executor, messages)
194
+ jobs << Processing::Jobs::Consume.new(executor, messages)
134
195
  end
196
+
197
+ @scheduler.schedule_consumption(@jobs_queue, jobs)
135
198
  end
136
199
 
137
200
  # Waits for all the jobs from a given subscription group to finish before moving forward
138
- # @param subscription_group [Karafka::Routing::SubscriptionGroup]
139
- def wait(subscription_group)
140
- @jobs_queue.wait(subscription_group.id)
201
+ def wait
202
+ @jobs_queue.wait(@subscription_group.id)
203
+ end
204
+
205
+ # Waits without blocking the polling
206
+ # This should be used only when we no longer plan to use any incoming data and we can safely
207
+ # discard it
208
+ def wait_with_poll
209
+ @client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
141
210
  end
142
211
 
143
212
  # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
144
213
  # stops kafka client.
145
214
  def shutdown
146
- @jobs_queue.close
147
- # This runs synchronously, making sure we finish all the shutdowns before we stop the
148
- # client.
149
- @executors.shutdown
150
215
  @client.commit_offsets!
151
216
  @client.stop
152
217
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Abstraction layer around listeners batch.
6
+ class ListenersBatch
7
+ include Enumerable
8
+
9
+ # @param jobs_queue [JobsQueue]
10
+ # @return [ListenersBatch]
11
+ def initialize(jobs_queue)
12
+ @batch = App.subscription_groups.map do |subscription_group|
13
+ Connection::Listener.new(subscription_group, jobs_queue)
14
+ end
15
+ end
16
+
17
+ # Iterates over available listeners and yields each listener
18
+ # @param block [Proc] block we want to run
19
+ def each(&block)
20
+ @batch.each(&block)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -2,16 +2,26 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Buffer for messages.
6
- # When message is added to this buffer, it gets assigned to an array with other messages from
7
- # the same topic and partition.
5
+ # Buffer used to build and store karafka messages built based on raw librdkafka messages.
8
6
  #
9
- # @note This buffer is NOT threadsafe.
7
+ # Why do we have two buffers? `RawMessagesBuffer` is used to store raw messages and to handle
8
+ # cases related to partition revocation and reconnections. It is "internal" to the listening
9
+ # process. `MessagesBuffer` on the other hand is used to "translate" those raw messages that
10
+ # we know that are ok into Karafka messages and to simplify further work with them.
11
+ #
12
+ # While it adds a bit of overhead, it makes conceptual things much easier and it adds only two
13
+ # simple hash iterations over messages batch.
14
+ #
15
+ # @note This buffer is NOT thread safe. We do not worry about it as we do not use it outside
16
+ # of the main listener loop. It can be cleared after the jobs are scheduled with messages
17
+ # it stores, because messages arrays are not "cleared" in any way directly and their
18
+ # reference stays.
10
19
  class MessagesBuffer
11
20
  attr_reader :size
12
21
 
13
- # @return [Karafka::Connection::MessagesBuffer] buffer instance
14
- def initialize
22
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
23
+ def initialize(subscription_group)
24
+ @subscription_group = subscription_group
15
25
  @size = 0
16
26
  @groups = Hash.new do |topic_groups, topic|
17
27
  topic_groups[topic] = Hash.new do |partition_groups, partition|
@@ -20,11 +30,35 @@ module Karafka
20
30
  end
21
31
  end
22
32
 
23
- # Iterates over aggregated data providing messages per topic partition.
33
+ # Remaps raw messages from the raw messages buffer to Karafka messages
34
+ # @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
35
+ def remap(raw_messages_buffer)
36
+ clear unless @size.zero?
37
+
38
+ # Since it happens "right after" we've received the messages, it is close enough it time
39
+ # to be used as the moment we received messages.
40
+ received_at = Time.now
41
+
42
+ raw_messages_buffer.each do |topic, partition, messages|
43
+ @size += messages.count
44
+
45
+ ktopic = @subscription_group.topics.find(topic)
46
+
47
+ @groups[topic][partition] = messages.map do |message|
48
+ Messages::Builders::Message.call(
49
+ message,
50
+ ktopic,
51
+ received_at
52
+ )
53
+ end
54
+ end
55
+ end
56
+
57
+ # Allows to iterate over all the topics and partitions messages
24
58
  #
25
59
  # @yieldparam [String] topic name
26
60
  # @yieldparam [Integer] partition number
27
- # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
61
+ # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
28
62
  def each
29
63
  @groups.each do |topic, partitions|
30
64
  partitions.each do |partition, messages|
@@ -33,24 +67,17 @@ module Karafka
33
67
  end
34
68
  end
35
69
 
36
- # Adds a message to the buffer.
37
- #
38
- # @param message [Rdkafka::Consumer::Message] raw rdkafka message
39
- # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
40
- def <<(message)
41
- @size += 1
42
- @groups[message.topic][message.partition] << message
70
+ # @return [Boolean] is the buffer empty or does it contain any messages
71
+ def empty?
72
+ @size.zero?
43
73
  end
44
74
 
45
- # Removes all the data from the buffer.
46
- #
47
- # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
48
- # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
49
- # may be used in other threads for data processing, thus if we would clear it, we could
50
- # potentially clear a raw messages array for a job that is in the jobs queue.
75
+ private
76
+
77
+ # Clears the buffer completely
51
78
  def clear
52
79
  @size = 0
53
- @groups.each_value(&:clear)
80
+ @groups.clear
54
81
  end
55
82
  end
56
83
  end
@@ -12,11 +12,11 @@ module Karafka
12
12
  end
13
13
  end
14
14
 
15
- # Creates or fetches pause of a given topic partition.
15
+ # Creates or fetches pause tracker of a given topic partition.
16
16
  #
17
17
  # @param topic [String] topic name
18
18
  # @param partition [Integer] partition number
19
- # @return [Karafka::TimeTrackers::Pause] pause instance
19
+ # @return [Karafka::TimeTrackers::Pause] pause tracker instance
20
20
  def fetch(topic, partition)
21
21
  @pauses[topic][partition] ||= TimeTrackers::Pause.new(
22
22
  timeout: Karafka::App.config.pause_timeout,
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Buffer for raw librdkafka messages.
6
+ #
7
+ # When message is added to this buffer, it gets assigned to an array with other messages from
8
+ # the same topic and partition.
9
+ #
10
+ # @note This buffer is NOT threadsafe.
11
+ #
12
+ # @note We store data here in groups per topic partition to handle the revocation case, where
13
+ # we may need to remove messages from a single topic partition.
14
+ class RawMessagesBuffer
15
+ attr_reader :size
16
+
17
+ # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
+ def initialize
19
+ @size = 0
20
+ @groups = Hash.new do |topic_groups, topic|
21
+ topic_groups[topic] = Hash.new do |partition_groups, partition|
22
+ partition_groups[partition] = []
23
+ end
24
+ end
25
+ end
26
+
27
+ # Adds a message to the buffer.
28
+ #
29
+ # @param message [Rdkafka::Consumer::Message] raw rdkafka message
30
+ # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
31
+ def <<(message)
32
+ @size += 1
33
+ @groups[message.topic][message.partition] << message
34
+ end
35
+
36
+ # Allows to iterate over all the topics and partitions messages
37
+ #
38
+ # @yieldparam [String] topic name
39
+ # @yieldparam [Integer] partition number
40
+ # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
41
+ def each
42
+ @groups.each do |topic, partitions|
43
+ partitions.each do |partition, messages|
44
+ yield(topic, partition, messages)
45
+ end
46
+ end
47
+ end
48
+
49
+ # Removes given topic and partition data out of the buffer
50
+ # This is used when there's a partition revocation
51
+ # @param topic [String] topic we're interested in
52
+ # @param partition [Integer] partition of which data we want to remove
53
+ def delete(topic, partition)
54
+ return unless @groups.key?(topic)
55
+ return unless @groups.fetch(topic).key?(partition)
56
+
57
+ topic_data = @groups.fetch(topic)
58
+ topic_data.delete(partition)
59
+
60
+ recount!
61
+
62
+ # If there are no more partitions to handle in a given topic, remove it completely
63
+ @groups.delete(topic) if topic_data.empty?
64
+ end
65
+
66
+ # Removes duplicated messages from the same partitions
67
+ # This should be used only when rebalance occurs, as we may get data again we already have
68
+ # due to the processing from the last offset. In cases like this, we may get same data
69
+ # again and we do want to ensure as few duplications as possible
70
+ def uniq!
71
+ @groups.each_value do |partitions|
72
+ partitions.each_value do |messages|
73
+ messages.uniq!(&:offset)
74
+ end
75
+ end
76
+
77
+ recount!
78
+ end
79
+
80
+ # Removes all the data from the buffer.
81
+ #
82
+ # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
83
+ # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
84
+ # may be used in other threads for data processing, thus if we would clear it, we could
85
+ # potentially clear a raw messages array for a job that is in the jobs queue.
86
+ def clear
87
+ @size = 0
88
+ @groups.each_value(&:clear)
89
+ end
90
+
91
+ private
92
+
93
+ # Updates the messages count if we performed any operations that could change the state
94
+ def recount!
95
+ @size = @groups.each_value.sum do |partitions|
96
+ partitions.each_value.map(&:count).sum
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -9,35 +9,50 @@ module Karafka
9
9
  #
10
10
  # @note Since this does not happen really often, we try to stick with same objects for the
11
11
  # empty states most of the time, so we don't create many objects during the manager life
12
+ #
13
+ # @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
14
+ # that are lost, are those that got revoked but did not get re-assigned back. We do not
15
+ # expose this concept outside and we normalize to have them revoked, as it is irrelevant
16
+ # from the rest of the code perspective as only those that are lost are truly revoked.
12
17
  class RebalanceManager
18
+ # Empty array for internal usage not to create new objects
19
+ EMPTY_ARRAY = [].freeze
20
+
21
+ private_constant :EMPTY_ARRAY
22
+
13
23
  # @return [RebalanceManager]
14
24
  def initialize
15
- @assigned = {}
16
- @revoked = {}
25
+ @assigned_partitions = {}
26
+ @revoked_partitions = {}
27
+ @lost_partitions = {}
17
28
  end
18
29
 
19
- # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
20
- # which we've got new partitions assigned and array with ids of the partitions as the value
21
- # @note Once assigned partitions are fetched, the state will be reset since the callbacks
22
- # for new assigned partitions are set only during a state change
23
- def assigned_partitions
24
- return @assigned if @assigned.empty?
25
-
26
- result = @assigned.dup
27
- @assigned.clear
28
- result
30
+ # Resets the rebalance manager state
31
+ # This needs to be done before each polling loop as during the polling, the state may be
32
+ # changed
33
+ def clear
34
+ @assigned_partitions.clear
35
+ @revoked_partitions.clear
36
+ @lost_partitions.clear
29
37
  end
30
38
 
31
39
  # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
32
40
  # which we've lost partitions and array with ids of the partitions as the value
33
- # @note Once revoked partitions are fetched, the state will be reset since the callbacks
34
- # for new revoked partitions are set only during a state change
41
+ # @note We do not consider as lost topics and partitions that got revoked and assigned
35
42
  def revoked_partitions
36
- return @revoked if @revoked.empty?
43
+ return @revoked_partitions if @revoked_partitions.empty?
44
+ return @lost_partitions unless @lost_partitions.empty?
45
+
46
+ @revoked_partitions.each do |topic, partitions|
47
+ @lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
48
+ end
49
+
50
+ @lost_partitions
51
+ end
37
52
 
38
- result = @revoked.dup
39
- @revoked.clear
40
- result
53
+ # @return [Boolean] true if any partitions were revoked
54
+ def revoked_partitions?
55
+ !revoked_partitions.empty?
41
56
  end
42
57
 
43
58
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
@@ -46,7 +61,7 @@ module Karafka
46
61
  # @param _ [Rdkafka::Consumer]
47
62
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
48
63
  def on_partitions_assigned(_, partitions)
49
- @assigned = partitions.to_h.transform_values { |part| part.map(&:partition) }
64
+ @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
50
65
  end
51
66
 
52
67
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
@@ -55,7 +70,7 @@ module Karafka
55
70
  # @param _ [Rdkafka::Consumer]
56
71
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
57
72
  def on_partitions_revoked(_, partitions)
58
- @revoked = partitions.to_h.transform_values { |part| part.map(&:partition) }
73
+ @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
59
74
  end
60
75
  end
61
76
  end
@@ -25,6 +25,7 @@ module Karafka
25
25
  required(:pause_max_timeout) { int? & gt?(0) }
26
26
  required(:pause_with_exponential_backoff).filled(:bool?)
27
27
  required(:shutdown_timeout) { int? & gt?(0) }
28
+ required(:max_wait_time) { int? & gt?(0) }
28
29
  required(:kafka).filled(:hash)
29
30
 
30
31
  # We validate internals just to be sure, that they are present and working
@@ -32,6 +33,7 @@ module Karafka
32
33
  required(:routing_builder)
33
34
  required(:status)
34
35
  required(:process)
36
+ required(:scheduler)
35
37
  required(:subscription_groups_builder)
36
38
  end
37
39
  end
@@ -52,6 +54,12 @@ module Karafka
52
54
  key(:pause_timeout).failure(:max_timeout_vs_pause_max_timeout)
53
55
  end
54
56
  end
57
+
58
+ rule(:shutdown_timeout, :max_wait_time) do
59
+ if values[:max_wait_time].to_i >= values[:shutdown_timeout].to_i
60
+ key(:shutdown_timeout).failure(:shutdown_timeout_vs_max_wait_time)
61
+ end
62
+ end
55
63
  end
56
64
  end
57
65
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # Allows a given class to run async in a separate thread. Provides also few methods we may
6
+ # want to use to control the underlying thread
7
+ #
8
+ # @note Thread running code needs to manage it's own exceptions. If they leak out, they will
9
+ # abort thread on exception.
10
+ module Async
11
+ class << self
12
+ # Adds forwardable to redirect thread-based control methods to the underlying thread that
13
+ # runs the async operations
14
+ #
15
+ # @param base [Class] class we're including this module in
16
+ def included(base)
17
+ base.extend ::Forwardable
18
+
19
+ base.def_delegators :@thread, :join, :terminate, :alive?
20
+ end
21
+ end
22
+
23
+ # Runs the `#call` method in a new thread
24
+ def async_call
25
+ @thread = Thread.new do
26
+ Thread.current.abort_on_exception = true
27
+
28
+ call
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -22,7 +22,8 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
- consumer.consume
25
+ consumer.prepared
26
+ consumer.consumed
26
27
  consumer.revoked
27
28
  consumer.shutdown
28
29
 
@@ -13,10 +13,33 @@ module Karafka
13
13
  :deserializer,
14
14
  :partition,
15
15
  :topic,
16
+ :created_at,
16
17
  :scheduled_at,
17
- :consumption_lag,
18
- :processing_lag,
18
+ :processed_at,
19
19
  keyword_init: true
20
- )
20
+ ) do
21
+ # This lag describes how long did it take for a message to be consumed from the moment it was
22
+ # created
23
+ def consumption_lag
24
+ time_distance_in_ms(processed_at, created_at)
25
+ end
26
+
27
+ # This lag describes how long did a batch have to wait before it was picked up by one of the
28
+ # workers
29
+ def processing_lag
30
+ time_distance_in_ms(processed_at, scheduled_at)
31
+ end
32
+
33
+ private
34
+
35
+ # Computes time distance in between two times in ms
36
+ #
37
+ # @param time1 [Time]
38
+ # @param time2 [Time]
39
+ # @return [Integer] distance in between two times in ms
40
+ def time_distance_in_ms(time1, time2)
41
+ ((time1 - time2) * 1_000).round
42
+ end
43
+ end
21
44
  end
22
45
  end