karafka 2.0.0.alpha5 → 2.0.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +35 -2
- data/Gemfile.lock +6 -6
- data/bin/integrations +55 -43
- data/config/errors.yml +1 -0
- data/docker-compose.yml +4 -1
- data/lib/active_job/karafka.rb +2 -2
- data/lib/karafka/active_job/routing/extensions.rb +21 -0
- data/lib/karafka/base_consumer.rb +65 -12
- data/lib/karafka/connection/client.rb +36 -6
- data/lib/karafka/connection/listener.rb +92 -27
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +49 -22
- data/lib/karafka/connection/pauses_manager.rb +2 -2
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +35 -20
- data/lib/karafka/contracts/config.rb +8 -0
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/instrumentation/monitor.rb +2 -1
- data/lib/karafka/messages/batch_metadata.rb +26 -3
- data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
- data/lib/karafka/messages/builders/message.rb +1 -0
- data/lib/karafka/messages/builders/messages.rb +4 -12
- data/lib/karafka/pro/active_job/consumer.rb +21 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +10 -10
- data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
- data/lib/karafka/pro/loader.rb +17 -8
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
- data/lib/karafka/pro/scheduler.rb +54 -0
- data/lib/karafka/processing/executor.rb +19 -11
- data/lib/karafka/processing/executors_buffer.rb +15 -7
- data/lib/karafka/processing/jobs/base.rb +28 -0
- data/lib/karafka/processing/jobs/consume.rb +11 -4
- data/lib/karafka/processing/jobs_queue.rb +28 -16
- data/lib/karafka/processing/worker.rb +30 -9
- data/lib/karafka/processing/workers_batch.rb +5 -0
- data/lib/karafka/railtie.rb +12 -0
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +19 -27
- data/lib/karafka/scheduler.rb +20 -0
- data/lib/karafka/server.rb +24 -23
- data/lib/karafka/setup/config.rb +4 -1
- data/lib/karafka/time_trackers/pause.rb +10 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +13 -4
- metadata.gz.sig +0 -0
- data/lib/karafka/active_job/routing_extensions.rb +0 -18
@@ -3,9 +3,13 @@
|
|
3
3
|
module Karafka
|
4
4
|
module Connection
|
5
5
|
# A single listener that listens to incoming messages from a single subscription group.
|
6
|
-
# It polls the messages and then enqueues. It also takes care of potential recovery from
|
6
|
+
# It polls the messages and then enqueues jobs. It also takes care of potential recovery from
|
7
7
|
# critical errors by restarting everything in a safe manner.
|
8
|
+
#
|
9
|
+
# This is the heart of the consumption process.
|
8
10
|
class Listener
|
11
|
+
include Helpers::Async
|
12
|
+
|
9
13
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
10
14
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
11
15
|
# @return [Karafka::Connection::Listener] listener instance
|
@@ -15,6 +19,12 @@ module Karafka
|
|
15
19
|
@pauses_manager = PausesManager.new
|
16
20
|
@client = Client.new(@subscription_group)
|
17
21
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
22
|
+
# We reference scheduler here as it is much faster than fetching this each time
|
23
|
+
@scheduler = ::Karafka::App.config.internal.scheduler
|
24
|
+
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
25
|
+
# We can do this that way because we always first schedule jobs using messages before we
|
26
|
+
# fetch another batch.
|
27
|
+
@messages_buffer = MessagesBuffer.new(subscription_group)
|
18
28
|
end
|
19
29
|
|
20
30
|
# Runs the main listener fetch loop.
|
@@ -51,33 +61,55 @@ module Karafka
|
|
51
61
|
)
|
52
62
|
|
53
63
|
resume_paused_partitions
|
64
|
+
|
54
65
|
# We need to fetch data before we revoke lost partitions details as during the polling
|
55
66
|
# the callbacks for tracking lost partitions are triggered. Otherwise we would be always
|
56
67
|
# one batch behind.
|
57
|
-
|
68
|
+
poll_and_remap_messages
|
58
69
|
|
59
70
|
Karafka.monitor.instrument(
|
60
71
|
'connection.listener.fetch_loop.received',
|
61
72
|
caller: self,
|
62
|
-
messages_buffer: messages_buffer
|
73
|
+
messages_buffer: @messages_buffer
|
63
74
|
)
|
64
75
|
|
65
76
|
# If there were revoked partitions, we need to wait on their jobs to finish before
|
66
77
|
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
67
78
|
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
68
79
|
# appropriate consumers are taken down and re-created
|
69
|
-
|
70
|
-
|
71
|
-
distribute_partitions_jobs(messages_buffer)
|
80
|
+
build_and_schedule_revoke_lost_partitions_jobs
|
72
81
|
|
73
82
|
# We wait only on jobs from our subscription group. Other groups are independent.
|
74
|
-
wait
|
83
|
+
wait
|
84
|
+
|
85
|
+
build_and_schedule_consumption_jobs
|
86
|
+
|
87
|
+
wait
|
75
88
|
|
76
89
|
# We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
|
77
90
|
# if needed by using manual offset management.
|
78
91
|
@client.commit_offsets
|
79
92
|
end
|
80
93
|
|
94
|
+
# If we are stopping we will no longer schedule any jobs despite polling.
|
95
|
+
# We need to keep polling not to exceed the `max.poll.interval` for long-running
|
96
|
+
# non-blocking jobs and we need to allow them to finish. We however do not want to
|
97
|
+
# enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
|
98
|
+
# to detect shutdown in their long-running logic or else Karafka will force shutdown
|
99
|
+
# after a while.
|
100
|
+
#
|
101
|
+
# We do not care about resuming any partitions or lost jobs as we do not plan to do
|
102
|
+
# anything with them as we're in the shutdown phase.
|
103
|
+
wait_with_poll
|
104
|
+
|
105
|
+
# We do not want to schedule the shutdown jobs prior to finishing all the jobs
|
106
|
+
# (including non-blocking) as there might be a long-running job with a shutdown and then
|
107
|
+
# we would run two jobs in parallel for the same executor and consumer. We do not want that
|
108
|
+
# as it could create a race-condition.
|
109
|
+
build_and_schedule_shutdown_jobs
|
110
|
+
|
111
|
+
wait_with_poll
|
112
|
+
|
81
113
|
shutdown
|
82
114
|
|
83
115
|
# This is on purpose - see the notes for this method
|
@@ -98,55 +130,88 @@ module Karafka
|
|
98
130
|
|
99
131
|
# Resumes processing of partitions that were paused due to an error.
|
100
132
|
def resume_paused_partitions
|
101
|
-
@pauses_manager.resume
|
133
|
+
@pauses_manager.resume do |topic, partition|
|
134
|
+
@client.resume(topic, partition)
|
135
|
+
end
|
102
136
|
end
|
103
137
|
|
104
138
|
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
105
|
-
|
106
|
-
def distribute_revoke_lost_partitions_jobs
|
139
|
+
def build_and_schedule_revoke_lost_partitions_jobs
|
107
140
|
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
108
141
|
|
109
|
-
|
142
|
+
# Stop early to save on some execution and array allocation
|
143
|
+
return if revoked_partitions.empty?
|
144
|
+
|
145
|
+
jobs = []
|
110
146
|
|
111
147
|
revoked_partitions.each do |topic, partitions|
|
112
148
|
partitions.each do |partition|
|
113
|
-
|
114
|
-
executor = @executors.fetch(topic, partition,
|
115
|
-
|
149
|
+
pause_tracker = @pauses_manager.fetch(topic, partition)
|
150
|
+
executor = @executors.fetch(topic, partition, pause_tracker)
|
151
|
+
jobs << Processing::Jobs::Revoked.new(executor)
|
116
152
|
end
|
117
153
|
end
|
118
154
|
|
119
|
-
|
155
|
+
@scheduler.schedule_revocation(@jobs_queue, jobs)
|
120
156
|
end
|
121
157
|
|
122
|
-
#
|
158
|
+
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
159
|
+
def build_and_schedule_shutdown_jobs
|
160
|
+
jobs = []
|
161
|
+
|
162
|
+
@executors.each do |_, _, executor|
|
163
|
+
jobs << Processing::Jobs::Shutdown.new(executor)
|
164
|
+
end
|
165
|
+
|
166
|
+
@scheduler.schedule_shutdown(@jobs_queue, jobs)
|
167
|
+
end
|
168
|
+
|
169
|
+
# Polls messages within the time and amount boundaries defined in the settings and then
|
170
|
+
# builds karafka messages based on the raw rdkafka messages buffer returned by the
|
171
|
+
# `#batch_poll` method.
|
123
172
|
#
|
124
|
-
# @
|
125
|
-
def
|
126
|
-
messages_buffer.
|
173
|
+
# @note There are two buffers, one for raw messages and one for "built" karafka messages
|
174
|
+
def poll_and_remap_messages
|
175
|
+
@messages_buffer.remap(
|
176
|
+
@client.batch_poll
|
177
|
+
)
|
178
|
+
end
|
179
|
+
|
180
|
+
# Takes the messages per topic partition and enqueues processing jobs in threads using
|
181
|
+
# given scheduler.
|
182
|
+
def build_and_schedule_consumption_jobs
|
183
|
+
return if @messages_buffer.empty?
|
184
|
+
|
185
|
+
jobs = []
|
186
|
+
|
187
|
+
@messages_buffer.each do |topic, partition, messages|
|
127
188
|
pause = @pauses_manager.fetch(topic, partition)
|
128
189
|
|
129
190
|
next if pause.paused?
|
130
191
|
|
131
192
|
executor = @executors.fetch(topic, partition, pause)
|
132
193
|
|
133
|
-
|
194
|
+
jobs << Processing::Jobs::Consume.new(executor, messages)
|
134
195
|
end
|
196
|
+
|
197
|
+
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
135
198
|
end
|
136
199
|
|
137
200
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
138
|
-
|
139
|
-
|
140
|
-
|
201
|
+
def wait
|
202
|
+
@jobs_queue.wait(@subscription_group.id)
|
203
|
+
end
|
204
|
+
|
205
|
+
# Waits without blocking the polling
|
206
|
+
# This should be used only when we no longer plan to use any incoming data and we can safely
|
207
|
+
# discard it
|
208
|
+
def wait_with_poll
|
209
|
+
@client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
|
141
210
|
end
|
142
211
|
|
143
212
|
# Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
|
144
213
|
# stops kafka client.
|
145
214
|
def shutdown
|
146
|
-
@jobs_queue.close
|
147
|
-
# This runs synchronously, making sure we finish all the shutdowns before we stop the
|
148
|
-
# client.
|
149
|
-
@executors.shutdown
|
150
215
|
@client.commit_offsets!
|
151
216
|
@client.stop
|
152
217
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Connection
|
5
|
+
# Abstraction layer around listeners batch.
|
6
|
+
class ListenersBatch
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# @param jobs_queue [JobsQueue]
|
10
|
+
# @return [ListenersBatch]
|
11
|
+
def initialize(jobs_queue)
|
12
|
+
@batch = App.subscription_groups.map do |subscription_group|
|
13
|
+
Connection::Listener.new(subscription_group, jobs_queue)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Iterates over available listeners and yields each listener
|
18
|
+
# @param block [Proc] block we want to run
|
19
|
+
def each(&block)
|
20
|
+
@batch.each(&block)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -2,16 +2,26 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Connection
|
5
|
-
# Buffer
|
6
|
-
# When message is added to this buffer, it gets assigned to an array with other messages from
|
7
|
-
# the same topic and partition.
|
5
|
+
# Buffer used to build and store karafka messages built based on raw librdkafka messages.
|
8
6
|
#
|
9
|
-
#
|
7
|
+
# Why do we have two buffers? `RawMessagesBuffer` is used to store raw messages and to handle
|
8
|
+
# cases related to partition revocation and reconnections. It is "internal" to the listening
|
9
|
+
# process. `MessagesBuffer` on the other hand is used to "translate" those raw messages that
|
10
|
+
# we know that are ok into Karafka messages and to simplify further work with them.
|
11
|
+
#
|
12
|
+
# While it adds a bit of overhead, it makes conceptual things much easier and it adds only two
|
13
|
+
# simple hash iterations over messages batch.
|
14
|
+
#
|
15
|
+
# @note This buffer is NOT thread safe. We do not worry about it as we do not use it outside
|
16
|
+
# of the main listener loop. It can be cleared after the jobs are scheduled with messages
|
17
|
+
# it stores, because messages arrays are not "cleared" in any way directly and their
|
18
|
+
# reference stays.
|
10
19
|
class MessagesBuffer
|
11
20
|
attr_reader :size
|
12
21
|
|
13
|
-
# @
|
14
|
-
def initialize
|
22
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
23
|
+
def initialize(subscription_group)
|
24
|
+
@subscription_group = subscription_group
|
15
25
|
@size = 0
|
16
26
|
@groups = Hash.new do |topic_groups, topic|
|
17
27
|
topic_groups[topic] = Hash.new do |partition_groups, partition|
|
@@ -20,11 +30,35 @@ module Karafka
|
|
20
30
|
end
|
21
31
|
end
|
22
32
|
|
23
|
-
#
|
33
|
+
# Remaps raw messages from the raw messages buffer to Karafka messages
|
34
|
+
# @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
|
35
|
+
def remap(raw_messages_buffer)
|
36
|
+
clear unless @size.zero?
|
37
|
+
|
38
|
+
# Since it happens "right after" we've received the messages, it is close enough it time
|
39
|
+
# to be used as the moment we received messages.
|
40
|
+
received_at = Time.now
|
41
|
+
|
42
|
+
raw_messages_buffer.each do |topic, partition, messages|
|
43
|
+
@size += messages.count
|
44
|
+
|
45
|
+
ktopic = @subscription_group.topics.find(topic)
|
46
|
+
|
47
|
+
@groups[topic][partition] = messages.map do |message|
|
48
|
+
Messages::Builders::Message.call(
|
49
|
+
message,
|
50
|
+
ktopic,
|
51
|
+
received_at
|
52
|
+
)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Allows to iterate over all the topics and partitions messages
|
24
58
|
#
|
25
59
|
# @yieldparam [String] topic name
|
26
60
|
# @yieldparam [Integer] partition number
|
27
|
-
# @yieldparam [Array<
|
61
|
+
# @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
|
28
62
|
def each
|
29
63
|
@groups.each do |topic, partitions|
|
30
64
|
partitions.each do |partition, messages|
|
@@ -33,24 +67,17 @@ module Karafka
|
|
33
67
|
end
|
34
68
|
end
|
35
69
|
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
# @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
|
40
|
-
def <<(message)
|
41
|
-
@size += 1
|
42
|
-
@groups[message.topic][message.partition] << message
|
70
|
+
# @return [Boolean] is the buffer empty or does it contain any messages
|
71
|
+
def empty?
|
72
|
+
@size.zero?
|
43
73
|
end
|
44
74
|
|
45
|
-
|
46
|
-
|
47
|
-
#
|
48
|
-
# we save ourselves some objects allocations. We cannot clear the underlying arrays as they
|
49
|
-
# may be used in other threads for data processing, thus if we would clear it, we could
|
50
|
-
# potentially clear a raw messages array for a job that is in the jobs queue.
|
75
|
+
private
|
76
|
+
|
77
|
+
# Clears the buffer completely
|
51
78
|
def clear
|
52
79
|
@size = 0
|
53
|
-
@groups.
|
80
|
+
@groups.clear
|
54
81
|
end
|
55
82
|
end
|
56
83
|
end
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
# Creates or fetches pause of a given topic partition.
|
15
|
+
# Creates or fetches pause tracker of a given topic partition.
|
16
16
|
#
|
17
17
|
# @param topic [String] topic name
|
18
18
|
# @param partition [Integer] partition number
|
19
|
-
# @return [Karafka::TimeTrackers::Pause] pause instance
|
19
|
+
# @return [Karafka::TimeTrackers::Pause] pause tracker instance
|
20
20
|
def fetch(topic, partition)
|
21
21
|
@pauses[topic][partition] ||= TimeTrackers::Pause.new(
|
22
22
|
timeout: Karafka::App.config.pause_timeout,
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Connection
|
5
|
+
# Buffer for raw librdkafka messages.
|
6
|
+
#
|
7
|
+
# When message is added to this buffer, it gets assigned to an array with other messages from
|
8
|
+
# the same topic and partition.
|
9
|
+
#
|
10
|
+
# @note This buffer is NOT threadsafe.
|
11
|
+
#
|
12
|
+
# @note We store data here in groups per topic partition to handle the revocation case, where
|
13
|
+
# we may need to remove messages from a single topic partition.
|
14
|
+
class RawMessagesBuffer
|
15
|
+
attr_reader :size
|
16
|
+
|
17
|
+
# @return [Karafka::Connection::MessagesBuffer] buffer instance
|
18
|
+
def initialize
|
19
|
+
@size = 0
|
20
|
+
@groups = Hash.new do |topic_groups, topic|
|
21
|
+
topic_groups[topic] = Hash.new do |partition_groups, partition|
|
22
|
+
partition_groups[partition] = []
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Adds a message to the buffer.
|
28
|
+
#
|
29
|
+
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
30
|
+
# @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
|
31
|
+
def <<(message)
|
32
|
+
@size += 1
|
33
|
+
@groups[message.topic][message.partition] << message
|
34
|
+
end
|
35
|
+
|
36
|
+
# Allows to iterate over all the topics and partitions messages
|
37
|
+
#
|
38
|
+
# @yieldparam [String] topic name
|
39
|
+
# @yieldparam [Integer] partition number
|
40
|
+
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
41
|
+
def each
|
42
|
+
@groups.each do |topic, partitions|
|
43
|
+
partitions.each do |partition, messages|
|
44
|
+
yield(topic, partition, messages)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Removes given topic and partition data out of the buffer
|
50
|
+
# This is used when there's a partition revocation
|
51
|
+
# @param topic [String] topic we're interested in
|
52
|
+
# @param partition [Integer] partition of which data we want to remove
|
53
|
+
def delete(topic, partition)
|
54
|
+
return unless @groups.key?(topic)
|
55
|
+
return unless @groups.fetch(topic).key?(partition)
|
56
|
+
|
57
|
+
topic_data = @groups.fetch(topic)
|
58
|
+
topic_data.delete(partition)
|
59
|
+
|
60
|
+
recount!
|
61
|
+
|
62
|
+
# If there are no more partitions to handle in a given topic, remove it completely
|
63
|
+
@groups.delete(topic) if topic_data.empty?
|
64
|
+
end
|
65
|
+
|
66
|
+
# Removes duplicated messages from the same partitions
|
67
|
+
# This should be used only when rebalance occurs, as we may get data again we already have
|
68
|
+
# due to the processing from the last offset. In cases like this, we may get same data
|
69
|
+
# again and we do want to ensure as few duplications as possible
|
70
|
+
def uniq!
|
71
|
+
@groups.each_value do |partitions|
|
72
|
+
partitions.each_value do |messages|
|
73
|
+
messages.uniq!(&:offset)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
recount!
|
78
|
+
end
|
79
|
+
|
80
|
+
# Removes all the data from the buffer.
|
81
|
+
#
|
82
|
+
# @note We do not clear the whole groups hash but rather we clear the partition hashes, so
|
83
|
+
# we save ourselves some objects allocations. We cannot clear the underlying arrays as they
|
84
|
+
# may be used in other threads for data processing, thus if we would clear it, we could
|
85
|
+
# potentially clear a raw messages array for a job that is in the jobs queue.
|
86
|
+
def clear
|
87
|
+
@size = 0
|
88
|
+
@groups.each_value(&:clear)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
# Updates the messages count if we performed any operations that could change the state
|
94
|
+
def recount!
|
95
|
+
@size = @groups.each_value.sum do |partitions|
|
96
|
+
partitions.each_value.map(&:count).sum
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -9,35 +9,50 @@ module Karafka
|
|
9
9
|
#
|
10
10
|
# @note Since this does not happen really often, we try to stick with same objects for the
|
11
11
|
# empty states most of the time, so we don't create many objects during the manager life
|
12
|
+
#
|
13
|
+
# @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
|
14
|
+
# that are lost, are those that got revoked but did not get re-assigned back. We do not
|
15
|
+
# expose this concept outside and we normalize to have them revoked, as it is irrelevant
|
16
|
+
# from the rest of the code perspective as only those that are lost are truly revoked.
|
12
17
|
class RebalanceManager
|
18
|
+
# Empty array for internal usage not to create new objects
|
19
|
+
EMPTY_ARRAY = [].freeze
|
20
|
+
|
21
|
+
private_constant :EMPTY_ARRAY
|
22
|
+
|
13
23
|
# @return [RebalanceManager]
|
14
24
|
def initialize
|
15
|
-
@
|
16
|
-
@
|
25
|
+
@assigned_partitions = {}
|
26
|
+
@revoked_partitions = {}
|
27
|
+
@lost_partitions = {}
|
17
28
|
end
|
18
29
|
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
result = @assigned.dup
|
27
|
-
@assigned.clear
|
28
|
-
result
|
30
|
+
# Resets the rebalance manager state
|
31
|
+
# This needs to be done before each polling loop as during the polling, the state may be
|
32
|
+
# changed
|
33
|
+
def clear
|
34
|
+
@assigned_partitions.clear
|
35
|
+
@revoked_partitions.clear
|
36
|
+
@lost_partitions.clear
|
29
37
|
end
|
30
38
|
|
31
39
|
# @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
|
32
40
|
# which we've lost partitions and array with ids of the partitions as the value
|
33
|
-
# @note
|
34
|
-
# for new revoked partitions are set only during a state change
|
41
|
+
# @note We do not consider as lost topics and partitions that got revoked and assigned
|
35
42
|
def revoked_partitions
|
36
|
-
return @
|
43
|
+
return @revoked_partitions if @revoked_partitions.empty?
|
44
|
+
return @lost_partitions unless @lost_partitions.empty?
|
45
|
+
|
46
|
+
@revoked_partitions.each do |topic, partitions|
|
47
|
+
@lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
|
48
|
+
end
|
49
|
+
|
50
|
+
@lost_partitions
|
51
|
+
end
|
37
52
|
|
38
|
-
|
39
|
-
|
40
|
-
|
53
|
+
# @return [Boolean] true if any partitions were revoked
|
54
|
+
def revoked_partitions?
|
55
|
+
!revoked_partitions.empty?
|
41
56
|
end
|
42
57
|
|
43
58
|
# Callback that kicks in inside of rdkafka, when new partitions are assigned.
|
@@ -46,7 +61,7 @@ module Karafka
|
|
46
61
|
# @param _ [Rdkafka::Consumer]
|
47
62
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
48
63
|
def on_partitions_assigned(_, partitions)
|
49
|
-
@
|
64
|
+
@assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
50
65
|
end
|
51
66
|
|
52
67
|
# Callback that kicks in inside of rdkafka, when partitions are revoked.
|
@@ -55,7 +70,7 @@ module Karafka
|
|
55
70
|
# @param _ [Rdkafka::Consumer]
|
56
71
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
57
72
|
def on_partitions_revoked(_, partitions)
|
58
|
-
@
|
73
|
+
@revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
59
74
|
end
|
60
75
|
end
|
61
76
|
end
|
@@ -25,6 +25,7 @@ module Karafka
|
|
25
25
|
required(:pause_max_timeout) { int? & gt?(0) }
|
26
26
|
required(:pause_with_exponential_backoff).filled(:bool?)
|
27
27
|
required(:shutdown_timeout) { int? & gt?(0) }
|
28
|
+
required(:max_wait_time) { int? & gt?(0) }
|
28
29
|
required(:kafka).filled(:hash)
|
29
30
|
|
30
31
|
# We validate internals just to be sure, that they are present and working
|
@@ -32,6 +33,7 @@ module Karafka
|
|
32
33
|
required(:routing_builder)
|
33
34
|
required(:status)
|
34
35
|
required(:process)
|
36
|
+
required(:scheduler)
|
35
37
|
required(:subscription_groups_builder)
|
36
38
|
end
|
37
39
|
end
|
@@ -52,6 +54,12 @@ module Karafka
|
|
52
54
|
key(:pause_timeout).failure(:max_timeout_vs_pause_max_timeout)
|
53
55
|
end
|
54
56
|
end
|
57
|
+
|
58
|
+
rule(:shutdown_timeout, :max_wait_time) do
|
59
|
+
if values[:max_wait_time].to_i >= values[:shutdown_timeout].to_i
|
60
|
+
key(:shutdown_timeout).failure(:shutdown_timeout_vs_max_wait_time)
|
61
|
+
end
|
62
|
+
end
|
55
63
|
end
|
56
64
|
end
|
57
65
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Helpers
|
5
|
+
# Allows a given class to run async in a separate thread. Provides also few methods we may
|
6
|
+
# want to use to control the underlying thread
|
7
|
+
#
|
8
|
+
# @note Thread running code needs to manage it's own exceptions. If they leak out, they will
|
9
|
+
# abort thread on exception.
|
10
|
+
module Async
|
11
|
+
class << self
|
12
|
+
# Adds forwardable to redirect thread-based control methods to the underlying thread that
|
13
|
+
# runs the async operations
|
14
|
+
#
|
15
|
+
# @param base [Class] class we're including this module in
|
16
|
+
def included(base)
|
17
|
+
base.extend ::Forwardable
|
18
|
+
|
19
|
+
base.def_delegators :@thread, :join, :terminate, :alive?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Runs the `#call` method in a new thread
|
24
|
+
def async_call
|
25
|
+
@thread = Thread.new do
|
26
|
+
Thread.current.abort_on_exception = true
|
27
|
+
|
28
|
+
call
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -13,10 +13,33 @@ module Karafka
|
|
13
13
|
:deserializer,
|
14
14
|
:partition,
|
15
15
|
:topic,
|
16
|
+
:created_at,
|
16
17
|
:scheduled_at,
|
17
|
-
:
|
18
|
-
:processing_lag,
|
18
|
+
:processed_at,
|
19
19
|
keyword_init: true
|
20
|
-
)
|
20
|
+
) do
|
21
|
+
# This lag describes how long did it take for a message to be consumed from the moment it was
|
22
|
+
# created
|
23
|
+
def consumption_lag
|
24
|
+
time_distance_in_ms(processed_at, created_at)
|
25
|
+
end
|
26
|
+
|
27
|
+
# This lag describes how long did a batch have to wait before it was picked up by one of the
|
28
|
+
# workers
|
29
|
+
def processing_lag
|
30
|
+
time_distance_in_ms(processed_at, scheduled_at)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# Computes time distance in between two times in ms
|
36
|
+
#
|
37
|
+
# @param time1 [Time]
|
38
|
+
# @param time2 [Time]
|
39
|
+
# @return [Integer] distance in between two times in ms
|
40
|
+
def time_distance_in_ms(time1, time2)
|
41
|
+
((time1 - time2) * 1_000).round
|
42
|
+
end
|
43
|
+
end
|
21
44
|
end
|
22
45
|
end
|