karafka 2.0.0.alpha5 → 2.0.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.ruby-version +1 -1
  4. data/CHANGELOG.md +35 -2
  5. data/Gemfile.lock +6 -6
  6. data/bin/integrations +55 -43
  7. data/config/errors.yml +1 -0
  8. data/docker-compose.yml +4 -1
  9. data/lib/active_job/karafka.rb +2 -2
  10. data/lib/karafka/active_job/routing/extensions.rb +21 -0
  11. data/lib/karafka/base_consumer.rb +65 -12
  12. data/lib/karafka/connection/client.rb +36 -6
  13. data/lib/karafka/connection/listener.rb +92 -27
  14. data/lib/karafka/connection/listeners_batch.rb +24 -0
  15. data/lib/karafka/connection/messages_buffer.rb +49 -22
  16. data/lib/karafka/connection/pauses_manager.rb +2 -2
  17. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  18. data/lib/karafka/connection/rebalance_manager.rb +35 -20
  19. data/lib/karafka/contracts/config.rb +8 -0
  20. data/lib/karafka/helpers/async.rb +33 -0
  21. data/lib/karafka/instrumentation/monitor.rb +2 -1
  22. data/lib/karafka/messages/batch_metadata.rb +26 -3
  23. data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
  24. data/lib/karafka/messages/builders/message.rb +1 -0
  25. data/lib/karafka/messages/builders/messages.rb +4 -12
  26. data/lib/karafka/pro/active_job/consumer.rb +21 -0
  27. data/lib/karafka/pro/active_job/dispatcher.rb +10 -10
  28. data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
  29. data/lib/karafka/pro/loader.rb +17 -8
  30. data/lib/karafka/pro/performance_tracker.rb +80 -0
  31. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  32. data/lib/karafka/pro/scheduler.rb +54 -0
  33. data/lib/karafka/processing/executor.rb +19 -11
  34. data/lib/karafka/processing/executors_buffer.rb +15 -7
  35. data/lib/karafka/processing/jobs/base.rb +28 -0
  36. data/lib/karafka/processing/jobs/consume.rb +11 -4
  37. data/lib/karafka/processing/jobs_queue.rb +28 -16
  38. data/lib/karafka/processing/worker.rb +30 -9
  39. data/lib/karafka/processing/workers_batch.rb +5 -0
  40. data/lib/karafka/railtie.rb +12 -0
  41. data/lib/karafka/routing/consumer_group.rb +1 -1
  42. data/lib/karafka/routing/subscription_group.rb +1 -1
  43. data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
  44. data/lib/karafka/routing/topics.rb +38 -0
  45. data/lib/karafka/runner.rb +19 -27
  46. data/lib/karafka/scheduler.rb +20 -0
  47. data/lib/karafka/server.rb +24 -23
  48. data/lib/karafka/setup/config.rb +4 -1
  49. data/lib/karafka/time_trackers/pause.rb +10 -2
  50. data/lib/karafka/version.rb +1 -1
  51. data.tar.gz.sig +0 -0
  52. metadata +13 -4
  53. metadata.gz.sig +0 -0
  54. data/lib/karafka/active_job/routing_extensions.rb +0 -18
@@ -3,9 +3,13 @@
3
3
  module Karafka
4
4
  module Connection
5
5
  # A single listener that listens to incoming messages from a single subscription group.
6
- # It polls the messages and then enqueues. It also takes care of potential recovery from
6
+ # It polls the messages and then enqueues jobs. It also takes care of potential recovery from
7
7
  # critical errors by restarting everything in a safe manner.
8
+ #
9
+ # This is the heart of the consumption process.
8
10
  class Listener
11
+ include Helpers::Async
12
+
9
13
  # @param subscription_group [Karafka::Routing::SubscriptionGroup]
10
14
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
11
15
  # @return [Karafka::Connection::Listener] listener instance
@@ -15,6 +19,12 @@ module Karafka
15
19
  @pauses_manager = PausesManager.new
16
20
  @client = Client.new(@subscription_group)
17
21
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
22
+ # We reference scheduler here as it is much faster than fetching this each time
23
+ @scheduler = ::Karafka::App.config.internal.scheduler
24
+ # We keep one buffer for messages to preserve memory and not allocate extra objects
25
+ # We can do this that way because we always first schedule jobs using messages before we
26
+ # fetch another batch.
27
+ @messages_buffer = MessagesBuffer.new(subscription_group)
18
28
  end
19
29
 
20
30
  # Runs the main listener fetch loop.
@@ -51,33 +61,55 @@ module Karafka
51
61
  )
52
62
 
53
63
  resume_paused_partitions
64
+
54
65
  # We need to fetch data before we revoke lost partitions details as during the polling
55
66
  # the callbacks for tracking lost partitions are triggered. Otherwise we would be always
56
67
  # one batch behind.
57
- messages_buffer = @client.batch_poll
68
+ poll_and_remap_messages
58
69
 
59
70
  Karafka.monitor.instrument(
60
71
  'connection.listener.fetch_loop.received',
61
72
  caller: self,
62
- messages_buffer: messages_buffer
73
+ messages_buffer: @messages_buffer
63
74
  )
64
75
 
65
76
  # If there were revoked partitions, we need to wait on their jobs to finish before
66
77
  # distributing consuming jobs as upon revoking, we might get assigned to the same
67
78
  # partitions, thus getting their jobs. The revoking jobs need to finish before
68
79
  # appropriate consumers are taken down and re-created
69
- wait(@subscription_group) if distribute_revoke_lost_partitions_jobs
70
-
71
- distribute_partitions_jobs(messages_buffer)
80
+ build_and_schedule_revoke_lost_partitions_jobs
72
81
 
73
82
  # We wait only on jobs from our subscription group. Other groups are independent.
74
- wait(@subscription_group)
83
+ wait
84
+
85
+ build_and_schedule_consumption_jobs
86
+
87
+ wait
75
88
 
76
89
  # We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
77
90
  # if needed by using manual offset management.
78
91
  @client.commit_offsets
79
92
  end
80
93
 
94
+ # If we are stopping we will no longer schedule any jobs despite polling.
95
+ # We need to keep polling not to exceed the `max.poll.interval` for long-running
96
+ # non-blocking jobs and we need to allow them to finish. We however do not want to
97
+ # enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
98
+ # to detect shutdown in their long-running logic or else Karafka will force shutdown
99
+ # after a while.
100
+ #
101
+ # We do not care about resuming any partitions or lost jobs as we do not plan to do
102
+ # anything with them as we're in the shutdown phase.
103
+ wait_with_poll
104
+
105
+ # We do not want to schedule the shutdown jobs prior to finishing all the jobs
106
+ # (including non-blocking) as there might be a long-running job with a shutdown and then
107
+ # we would run two jobs in parallel for the same executor and consumer. We do not want that
108
+ # as it could create a race-condition.
109
+ build_and_schedule_shutdown_jobs
110
+
111
+ wait_with_poll
112
+
81
113
  shutdown
82
114
 
83
115
  # This is on purpose - see the notes for this method
@@ -98,55 +130,88 @@ module Karafka
98
130
 
99
131
  # Resumes processing of partitions that were paused due to an error.
100
132
  def resume_paused_partitions
101
- @pauses_manager.resume { |topic, partition| @client.resume(topic, partition) }
133
+ @pauses_manager.resume do |topic, partition|
134
+ @client.resume(topic, partition)
135
+ end
102
136
  end
103
137
 
104
138
  # Enqueues revoking jobs for partitions that were taken away from the running process.
105
- # @return [Boolean] was there anything to revoke
106
- def distribute_revoke_lost_partitions_jobs
139
+ def build_and_schedule_revoke_lost_partitions_jobs
107
140
  revoked_partitions = @client.rebalance_manager.revoked_partitions
108
141
 
109
- return false if revoked_partitions.empty?
142
+ # Stop early to save on some execution and array allocation
143
+ return if revoked_partitions.empty?
144
+
145
+ jobs = []
110
146
 
111
147
  revoked_partitions.each do |topic, partitions|
112
148
  partitions.each do |partition|
113
- pause = @pauses_manager.fetch(topic, partition)
114
- executor = @executors.fetch(topic, partition, pause)
115
- @jobs_queue << Processing::Jobs::Revoked.new(executor)
149
+ pause_tracker = @pauses_manager.fetch(topic, partition)
150
+ executor = @executors.fetch(topic, partition, pause_tracker)
151
+ jobs << Processing::Jobs::Revoked.new(executor)
116
152
  end
117
153
  end
118
154
 
119
- true
155
+ @scheduler.schedule_revocation(@jobs_queue, jobs)
120
156
  end
121
157
 
122
- # Takes the messages per topic partition and enqueues processing jobs in threads.
158
+ # Enqueues the shutdown jobs for all the executors that exist in our subscription group
159
+ def build_and_schedule_shutdown_jobs
160
+ jobs = []
161
+
162
+ @executors.each do |_, _, executor|
163
+ jobs << Processing::Jobs::Shutdown.new(executor)
164
+ end
165
+
166
+ @scheduler.schedule_shutdown(@jobs_queue, jobs)
167
+ end
168
+
169
+ # Polls messages within the time and amount boundaries defined in the settings and then
170
+ # builds karafka messages based on the raw rdkafka messages buffer returned by the
171
+ # `#batch_poll` method.
123
172
  #
124
- # @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
125
- def distribute_partitions_jobs(messages_buffer)
126
- messages_buffer.each do |topic, partition, messages|
173
+ # @note There are two buffers, one for raw messages and one for "built" karafka messages
174
+ def poll_and_remap_messages
175
+ @messages_buffer.remap(
176
+ @client.batch_poll
177
+ )
178
+ end
179
+
180
+ # Takes the messages per topic partition and enqueues processing jobs in threads using
181
+ # given scheduler.
182
+ def build_and_schedule_consumption_jobs
183
+ return if @messages_buffer.empty?
184
+
185
+ jobs = []
186
+
187
+ @messages_buffer.each do |topic, partition, messages|
127
188
  pause = @pauses_manager.fetch(topic, partition)
128
189
 
129
190
  next if pause.paused?
130
191
 
131
192
  executor = @executors.fetch(topic, partition, pause)
132
193
 
133
- @jobs_queue << Processing::Jobs::Consume.new(executor, messages)
194
+ jobs << Processing::Jobs::Consume.new(executor, messages)
134
195
  end
196
+
197
+ @scheduler.schedule_consumption(@jobs_queue, jobs)
135
198
  end
136
199
 
137
200
  # Waits for all the jobs from a given subscription group to finish before moving forward
138
- # @param subscription_group [Karafka::Routing::SubscriptionGroup]
139
- def wait(subscription_group)
140
- @jobs_queue.wait(subscription_group.id)
201
+ def wait
202
+ @jobs_queue.wait(@subscription_group.id)
203
+ end
204
+
205
+ # Waits without blocking the polling
206
+ # This should be used only when we no longer plan to use any incoming data and we can safely
207
+ # discard it
208
+ def wait_with_poll
209
+ @client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
141
210
  end
142
211
 
143
212
  # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
144
213
  # stops kafka client.
145
214
  def shutdown
146
- @jobs_queue.close
147
- # This runs synchronously, making sure we finish all the shutdowns before we stop the
148
- # client.
149
- @executors.shutdown
150
215
  @client.commit_offsets!
151
216
  @client.stop
152
217
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Abstraction layer around listeners batch.
6
+ class ListenersBatch
7
+ include Enumerable
8
+
9
+ # @param jobs_queue [JobsQueue]
10
+ # @return [ListenersBatch]
11
+ def initialize(jobs_queue)
12
+ @batch = App.subscription_groups.map do |subscription_group|
13
+ Connection::Listener.new(subscription_group, jobs_queue)
14
+ end
15
+ end
16
+
17
+ # Iterates over available listeners and yields each listener
18
+ # @param block [Proc] block we want to run
19
+ def each(&block)
20
+ @batch.each(&block)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -2,16 +2,26 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Buffer for messages.
6
- # When message is added to this buffer, it gets assigned to an array with other messages from
7
- # the same topic and partition.
5
+ # Buffer used to build and store karafka messages built based on raw librdkafka messages.
8
6
  #
9
- # @note This buffer is NOT threadsafe.
7
+ # Why do we have two buffers? `RawMessagesBuffer` is used to store raw messages and to handle
8
+ # cases related to partition revocation and reconnections. It is "internal" to the listening
9
+ # process. `MessagesBuffer` on the other hand is used to "translate" those raw messages that
10
+ # we know that are ok into Karafka messages and to simplify further work with them.
11
+ #
12
+ # While it adds a bit of overhead, it makes conceptual things much easier and it adds only two
13
+ # simple hash iterations over messages batch.
14
+ #
15
+ # @note This buffer is NOT thread safe. We do not worry about it as we do not use it outside
16
+ # of the main listener loop. It can be cleared after the jobs are scheduled with messages
17
+ # it stores, because messages arrays are not "cleared" in any way directly and their
18
+ # reference stays.
10
19
  class MessagesBuffer
11
20
  attr_reader :size
12
21
 
13
- # @return [Karafka::Connection::MessagesBuffer] buffer instance
14
- def initialize
22
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
23
+ def initialize(subscription_group)
24
+ @subscription_group = subscription_group
15
25
  @size = 0
16
26
  @groups = Hash.new do |topic_groups, topic|
17
27
  topic_groups[topic] = Hash.new do |partition_groups, partition|
@@ -20,11 +30,35 @@ module Karafka
20
30
  end
21
31
  end
22
32
 
23
- # Iterates over aggregated data providing messages per topic partition.
33
+ # Remaps raw messages from the raw messages buffer to Karafka messages
34
+ # @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
35
+ def remap(raw_messages_buffer)
36
+ clear unless @size.zero?
37
+
38
+ # Since it happens "right after" we've received the messages, it is close enough it time
39
+ # to be used as the moment we received messages.
40
+ received_at = Time.now
41
+
42
+ raw_messages_buffer.each do |topic, partition, messages|
43
+ @size += messages.count
44
+
45
+ ktopic = @subscription_group.topics.find(topic)
46
+
47
+ @groups[topic][partition] = messages.map do |message|
48
+ Messages::Builders::Message.call(
49
+ message,
50
+ ktopic,
51
+ received_at
52
+ )
53
+ end
54
+ end
55
+ end
56
+
57
+ # Allows to iterate over all the topics and partitions messages
24
58
  #
25
59
  # @yieldparam [String] topic name
26
60
  # @yieldparam [Integer] partition number
27
- # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
61
+ # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
28
62
  def each
29
63
  @groups.each do |topic, partitions|
30
64
  partitions.each do |partition, messages|
@@ -33,24 +67,17 @@ module Karafka
33
67
  end
34
68
  end
35
69
 
36
- # Adds a message to the buffer.
37
- #
38
- # @param message [Rdkafka::Consumer::Message] raw rdkafka message
39
- # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
40
- def <<(message)
41
- @size += 1
42
- @groups[message.topic][message.partition] << message
70
+ # @return [Boolean] is the buffer empty or does it contain any messages
71
+ def empty?
72
+ @size.zero?
43
73
  end
44
74
 
45
- # Removes all the data from the buffer.
46
- #
47
- # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
48
- # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
49
- # may be used in other threads for data processing, thus if we would clear it, we could
50
- # potentially clear a raw messages array for a job that is in the jobs queue.
75
+ private
76
+
77
+ # Clears the buffer completely
51
78
  def clear
52
79
  @size = 0
53
- @groups.each_value(&:clear)
80
+ @groups.clear
54
81
  end
55
82
  end
56
83
  end
@@ -12,11 +12,11 @@ module Karafka
12
12
  end
13
13
  end
14
14
 
15
- # Creates or fetches pause of a given topic partition.
15
+ # Creates or fetches pause tracker of a given topic partition.
16
16
  #
17
17
  # @param topic [String] topic name
18
18
  # @param partition [Integer] partition number
19
- # @return [Karafka::TimeTrackers::Pause] pause instance
19
+ # @return [Karafka::TimeTrackers::Pause] pause tracker instance
20
20
  def fetch(topic, partition)
21
21
  @pauses[topic][partition] ||= TimeTrackers::Pause.new(
22
22
  timeout: Karafka::App.config.pause_timeout,
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Buffer for raw librdkafka messages.
6
+ #
7
+ # When message is added to this buffer, it gets assigned to an array with other messages from
8
+ # the same topic and partition.
9
+ #
10
+ # @note This buffer is NOT threadsafe.
11
+ #
12
+ # @note We store data here in groups per topic partition to handle the revocation case, where
13
+ # we may need to remove messages from a single topic partition.
14
+ class RawMessagesBuffer
15
+ attr_reader :size
16
+
17
+ # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
+ def initialize
19
+ @size = 0
20
+ @groups = Hash.new do |topic_groups, topic|
21
+ topic_groups[topic] = Hash.new do |partition_groups, partition|
22
+ partition_groups[partition] = []
23
+ end
24
+ end
25
+ end
26
+
27
+ # Adds a message to the buffer.
28
+ #
29
+ # @param message [Rdkafka::Consumer::Message] raw rdkafka message
30
+ # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
31
+ def <<(message)
32
+ @size += 1
33
+ @groups[message.topic][message.partition] << message
34
+ end
35
+
36
+ # Allows to iterate over all the topics and partitions messages
37
+ #
38
+ # @yieldparam [String] topic name
39
+ # @yieldparam [Integer] partition number
40
+ # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
41
+ def each
42
+ @groups.each do |topic, partitions|
43
+ partitions.each do |partition, messages|
44
+ yield(topic, partition, messages)
45
+ end
46
+ end
47
+ end
48
+
49
+ # Removes given topic and partition data out of the buffer
50
+ # This is used when there's a partition revocation
51
+ # @param topic [String] topic we're interested in
52
+ # @param partition [Integer] partition of which data we want to remove
53
+ def delete(topic, partition)
54
+ return unless @groups.key?(topic)
55
+ return unless @groups.fetch(topic).key?(partition)
56
+
57
+ topic_data = @groups.fetch(topic)
58
+ topic_data.delete(partition)
59
+
60
+ recount!
61
+
62
+ # If there are no more partitions to handle in a given topic, remove it completely
63
+ @groups.delete(topic) if topic_data.empty?
64
+ end
65
+
66
+ # Removes duplicated messages from the same partitions
67
+ # This should be used only when rebalance occurs, as we may get data again we already have
68
+ # due to the processing from the last offset. In cases like this, we may get same data
69
+ # again and we do want to ensure as few duplications as possible
70
+ def uniq!
71
+ @groups.each_value do |partitions|
72
+ partitions.each_value do |messages|
73
+ messages.uniq!(&:offset)
74
+ end
75
+ end
76
+
77
+ recount!
78
+ end
79
+
80
+ # Removes all the data from the buffer.
81
+ #
82
+ # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
83
+ # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
84
+ # may be used in other threads for data processing, thus if we would clear it, we could
85
+ # potentially clear a raw messages array for a job that is in the jobs queue.
86
+ def clear
87
+ @size = 0
88
+ @groups.each_value(&:clear)
89
+ end
90
+
91
+ private
92
+
93
+ # Updates the messages count if we performed any operations that could change the state
94
+ def recount!
95
+ @size = @groups.each_value.sum do |partitions|
96
+ partitions.each_value.map(&:count).sum
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -9,35 +9,50 @@ module Karafka
9
9
  #
10
10
  # @note Since this does not happen really often, we try to stick with same objects for the
11
11
  # empty states most of the time, so we don't create many objects during the manager life
12
+ #
13
+ # @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
14
+ # that are lost, are those that got revoked but did not get re-assigned back. We do not
15
+ # expose this concept outside and we normalize to have them revoked, as it is irrelevant
16
+ # from the rest of the code perspective as only those that are lost are truly revoked.
12
17
  class RebalanceManager
18
+ # Empty array for internal usage not to create new objects
19
+ EMPTY_ARRAY = [].freeze
20
+
21
+ private_constant :EMPTY_ARRAY
22
+
13
23
  # @return [RebalanceManager]
14
24
  def initialize
15
- @assigned = {}
16
- @revoked = {}
25
+ @assigned_partitions = {}
26
+ @revoked_partitions = {}
27
+ @lost_partitions = {}
17
28
  end
18
29
 
19
- # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
20
- # which we've got new partitions assigned and array with ids of the partitions as the value
21
- # @note Once assigned partitions are fetched, the state will be reset since the callbacks
22
- # for new assigned partitions are set only during a state change
23
- def assigned_partitions
24
- return @assigned if @assigned.empty?
25
-
26
- result = @assigned.dup
27
- @assigned.clear
28
- result
30
+ # Resets the rebalance manager state
31
+ # This needs to be done before each polling loop as during the polling, the state may be
32
+ # changed
33
+ def clear
34
+ @assigned_partitions.clear
35
+ @revoked_partitions.clear
36
+ @lost_partitions.clear
29
37
  end
30
38
 
31
39
  # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
32
40
  # which we've lost partitions and array with ids of the partitions as the value
33
- # @note Once revoked partitions are fetched, the state will be reset since the callbacks
34
- # for new revoked partitions are set only during a state change
41
+ # @note We do not consider as lost topics and partitions that got revoked and assigned
35
42
  def revoked_partitions
36
- return @revoked if @revoked.empty?
43
+ return @revoked_partitions if @revoked_partitions.empty?
44
+ return @lost_partitions unless @lost_partitions.empty?
45
+
46
+ @revoked_partitions.each do |topic, partitions|
47
+ @lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
48
+ end
49
+
50
+ @lost_partitions
51
+ end
37
52
 
38
- result = @revoked.dup
39
- @revoked.clear
40
- result
53
+ # @return [Boolean] true if any partitions were revoked
54
+ def revoked_partitions?
55
+ !revoked_partitions.empty?
41
56
  end
42
57
 
43
58
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
@@ -46,7 +61,7 @@ module Karafka
46
61
  # @param _ [Rdkafka::Consumer]
47
62
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
48
63
  def on_partitions_assigned(_, partitions)
49
- @assigned = partitions.to_h.transform_values { |part| part.map(&:partition) }
64
+ @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
50
65
  end
51
66
 
52
67
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
@@ -55,7 +70,7 @@ module Karafka
55
70
  # @param _ [Rdkafka::Consumer]
56
71
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
57
72
  def on_partitions_revoked(_, partitions)
58
- @revoked = partitions.to_h.transform_values { |part| part.map(&:partition) }
73
+ @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
59
74
  end
60
75
  end
61
76
  end
@@ -25,6 +25,7 @@ module Karafka
25
25
  required(:pause_max_timeout) { int? & gt?(0) }
26
26
  required(:pause_with_exponential_backoff).filled(:bool?)
27
27
  required(:shutdown_timeout) { int? & gt?(0) }
28
+ required(:max_wait_time) { int? & gt?(0) }
28
29
  required(:kafka).filled(:hash)
29
30
 
30
31
  # We validate internals just to be sure, that they are present and working
@@ -32,6 +33,7 @@ module Karafka
32
33
  required(:routing_builder)
33
34
  required(:status)
34
35
  required(:process)
36
+ required(:scheduler)
35
37
  required(:subscription_groups_builder)
36
38
  end
37
39
  end
@@ -52,6 +54,12 @@ module Karafka
52
54
  key(:pause_timeout).failure(:max_timeout_vs_pause_max_timeout)
53
55
  end
54
56
  end
57
+
58
+ rule(:shutdown_timeout, :max_wait_time) do
59
+ if values[:max_wait_time].to_i >= values[:shutdown_timeout].to_i
60
+ key(:shutdown_timeout).failure(:shutdown_timeout_vs_max_wait_time)
61
+ end
62
+ end
55
63
  end
56
64
  end
57
65
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # Allows a given class to run async in a separate thread. Provides also few methods we may
6
+ # want to use to control the underlying thread
7
+ #
8
+ # @note Thread running code needs to manage it's own exceptions. If they leak out, they will
9
+ # abort thread on exception.
10
+ module Async
11
+ class << self
12
+ # Adds forwardable to redirect thread-based control methods to the underlying thread that
13
+ # runs the async operations
14
+ #
15
+ # @param base [Class] class we're including this module in
16
+ def included(base)
17
+ base.extend ::Forwardable
18
+
19
+ base.def_delegators :@thread, :join, :terminate, :alive?
20
+ end
21
+ end
22
+
23
+ # Runs the `#call` method in a new thread
24
+ def async_call
25
+ @thread = Thread.new do
26
+ Thread.current.abort_on_exception = true
27
+
28
+ call
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -22,7 +22,8 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
- consumer.consume
25
+ consumer.prepared
26
+ consumer.consumed
26
27
  consumer.revoked
27
28
  consumer.shutdown
28
29
 
@@ -13,10 +13,33 @@ module Karafka
13
13
  :deserializer,
14
14
  :partition,
15
15
  :topic,
16
+ :created_at,
16
17
  :scheduled_at,
17
- :consumption_lag,
18
- :processing_lag,
18
+ :processed_at,
19
19
  keyword_init: true
20
- )
20
+ ) do
21
+ # This lag describes how long did it take for a message to be consumed from the moment it was
22
+ # created
23
+ def consumption_lag
24
+ time_distance_in_ms(processed_at, created_at)
25
+ end
26
+
27
+ # This lag describes how long did a batch have to wait before it was picked up by one of the
28
+ # workers
29
+ def processing_lag
30
+ time_distance_in_ms(processed_at, scheduled_at)
31
+ end
32
+
33
+ private
34
+
35
+ # Computes time distance in between two times in ms
36
+ #
37
+ # @param time1 [Time]
38
+ # @param time2 [Time]
39
+ # @return [Integer] distance in between two times in ms
40
+ def time_distance_in_ms(time1, time2)
41
+ ((time1 - time2) * 1_000).round
42
+ end
43
+ end
21
44
  end
22
45
  end