karafka 2.0.0.beta3 → 2.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +18 -15
- data/CHANGELOG.md +37 -0
- data/CONTRIBUTING.md +0 -5
- data/Gemfile.lock +6 -6
- data/README.md +2 -10
- data/bin/benchmarks +2 -2
- data/bin/integrations +10 -3
- data/bin/{stress → stress_many} +1 -1
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/docker-compose.yml +32 -13
- data/karafka.gemspec +1 -1
- data/lib/karafka/active_job/routing/extensions.rb +1 -1
- data/lib/karafka/app.rb +2 -1
- data/lib/karafka/base_consumer.rb +59 -46
- data/lib/karafka/connection/client.rb +60 -14
- data/lib/karafka/connection/listener.rb +37 -11
- data/lib/karafka/connection/rebalance_manager.rb +20 -19
- data/lib/karafka/contracts/config.rb +18 -4
- data/lib/karafka/contracts/server_cli_options.rb +1 -1
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/logger_listener.rb +0 -3
- data/lib/karafka/instrumentation/monitor.rb +0 -1
- data/lib/karafka/pro/active_job/consumer.rb +2 -8
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/loader.rb +14 -8
- data/lib/karafka/pro/processing/coordinator.rb +63 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
- data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
- data/lib/karafka/pro/processing/partitioner.rb +41 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/extensions.rb +6 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +16 -9
- data/lib/karafka/processing/executors_buffer.rb +46 -15
- data/lib/karafka/processing/jobs/base.rb +8 -3
- data/lib/karafka/processing/jobs/consume.rb +11 -4
- data/lib/karafka/processing/jobs_builder.rb +3 -2
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +2 -2
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/topic.rb +14 -0
- data/lib/karafka/setup/config.rb +20 -10
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +16 -8
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
- data/lib/karafka/pro/scheduler.rb +0 -54
- data/lib/karafka/scheduler.rb +0 -20
@@ -36,6 +36,12 @@ module Karafka
|
|
36
36
|
# Marks if we need to offset. If we did not store offsets, we should not commit the offset
|
37
37
|
# position as it will crash rdkafka
|
38
38
|
@offsetting = false
|
39
|
+
# We need to keep track of what we have paused for resuming
|
40
|
+
# In case we loose partition, we still need to resume it, otherwise it won't be fetched
|
41
|
+
# again if we get reassigned to it later on. We need to keep them as after revocation we
|
42
|
+
# no longer may be able to fetch them from Kafka. We could build them but it is easier
|
43
|
+
# to just keep them here and use if needed when cannot be obtained
|
44
|
+
@paused_tpls = Hash.new { |h, k| h[k] = {} }
|
39
45
|
end
|
40
46
|
|
41
47
|
# Fetches messages within boundaries defined by the settings (time, size, topics, etc).
|
@@ -45,12 +51,13 @@ module Karafka
|
|
45
51
|
# @note This method should not be executed from many threads at the same time
|
46
52
|
def batch_poll
|
47
53
|
time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
|
48
|
-
time_poll.start
|
49
54
|
|
50
55
|
@buffer.clear
|
51
56
|
@rebalance_manager.clear
|
52
57
|
|
53
58
|
loop do
|
59
|
+
time_poll.start
|
60
|
+
|
54
61
|
# Don't fetch more messages if we do not have any time left
|
55
62
|
break if time_poll.exceeded?
|
56
63
|
# Don't fetch more messages if we've fetched max as we've wanted
|
@@ -69,7 +76,11 @@ module Karafka
|
|
69
76
|
# If partition revocation happens, we need to remove messages from revoked partitions
|
70
77
|
# as well as ensure we do not have duplicated due to the offset reset for partitions
|
71
78
|
# that we got assigned
|
72
|
-
|
79
|
+
# We also do early break, so the information about rebalance is used as soon as possible
|
80
|
+
if @rebalance_manager.changed?
|
81
|
+
remove_revoked_and_duplicated_messages
|
82
|
+
break
|
83
|
+
end
|
73
84
|
|
74
85
|
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
75
86
|
# we can break.
|
@@ -144,10 +155,14 @@ module Karafka
|
|
144
155
|
|
145
156
|
internal_commit_offsets(async: false)
|
146
157
|
|
158
|
+
# Here we do not use our cached tpls because we should not try to pause something we do
|
159
|
+
# not own anymore.
|
147
160
|
tpl = topic_partition_list(topic, partition)
|
148
161
|
|
149
162
|
return unless tpl
|
150
163
|
|
164
|
+
@paused_tpls[topic][partition] = tpl
|
165
|
+
|
151
166
|
@kafka.pause(tpl)
|
152
167
|
|
153
168
|
@kafka.seek(pause_msg)
|
@@ -169,9 +184,13 @@ module Karafka
|
|
169
184
|
# We can skip performance penalty since resuming should not happen too often
|
170
185
|
internal_commit_offsets(async: false)
|
171
186
|
|
172
|
-
|
187
|
+
# If we were not able, let's try to reuse the one we have (if we have)
|
188
|
+
tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
|
173
189
|
|
174
190
|
return unless tpl
|
191
|
+
# If we did not have it, it means we never paused this partition, thus no resume should
|
192
|
+
# happen in the first place
|
193
|
+
return unless @paused_tpls[topic].delete(partition)
|
175
194
|
|
176
195
|
@kafka.resume(tpl)
|
177
196
|
ensure
|
@@ -190,6 +209,7 @@ module Karafka
|
|
190
209
|
# Marks given message as consumed.
|
191
210
|
#
|
192
211
|
# @param [Karafka::Messages::Message] message that we want to mark as processed
|
212
|
+
# @return [Boolean] true if successful. False if we no longer own given partition
|
193
213
|
# @note This method won't trigger automatic offsets commits, rather relying on the offset
|
194
214
|
# check-pointing trigger that happens with each batch processed
|
195
215
|
def mark_as_consumed(message)
|
@@ -199,8 +219,10 @@ module Karafka
|
|
199
219
|
# Marks a given message as consumed and commits the offsets in a blocking way.
|
200
220
|
#
|
201
221
|
# @param [Karafka::Messages::Message] message that we want to mark as processed
|
222
|
+
# @return [Boolean] true if successful. False if we no longer own given partition
|
202
223
|
def mark_as_consumed!(message)
|
203
|
-
mark_as_consumed(message)
|
224
|
+
return false unless mark_as_consumed(message)
|
225
|
+
|
204
226
|
commit_offsets!
|
205
227
|
end
|
206
228
|
|
@@ -211,28 +233,42 @@ module Karafka
|
|
211
233
|
@mutex.synchronize do
|
212
234
|
@closed = false
|
213
235
|
@offsetting = false
|
236
|
+
@paused_tpls.clear
|
214
237
|
@kafka = build_consumer
|
215
238
|
end
|
216
239
|
end
|
217
240
|
|
218
241
|
private
|
219
242
|
|
243
|
+
# When we cannot store an offset, it means we no longer own the partition
|
244
|
+
#
|
220
245
|
# Non thread-safe offset storing method
|
221
246
|
# @param message [Karafka::Messages::Message]
|
247
|
+
# @return [Boolean] true if we could store the offset (if we still own the partition)
|
222
248
|
def internal_store_offset(message)
|
223
249
|
@offsetting = true
|
224
250
|
@kafka.store_offset(message)
|
251
|
+
true
|
252
|
+
rescue Rdkafka::RdkafkaError => e
|
253
|
+
return false if e.code == :assignment_lost
|
254
|
+
return false if e.code == :state
|
255
|
+
|
256
|
+
raise e
|
225
257
|
end
|
226
258
|
|
227
259
|
# Non thread-safe message committing method
|
228
260
|
# @param async [Boolean] should the commit happen async or sync (async by default)
|
261
|
+
# @return [Boolean] true if offset commit worked, false if we've lost the assignment
|
229
262
|
def internal_commit_offsets(async: true)
|
230
|
-
return unless @offsetting
|
263
|
+
return true unless @offsetting
|
231
264
|
|
232
265
|
@kafka.commit(nil, async)
|
233
266
|
@offsetting = false
|
267
|
+
|
268
|
+
true
|
234
269
|
rescue Rdkafka::RdkafkaError => e
|
235
|
-
return if e.code == :
|
270
|
+
return false if e.code == :assignment_lost
|
271
|
+
return false if e.code == :no_offset
|
236
272
|
|
237
273
|
raise e
|
238
274
|
end
|
@@ -250,7 +286,8 @@ module Karafka
|
|
250
286
|
|
251
287
|
@kafka.close
|
252
288
|
@buffer.clear
|
253
|
-
@
|
289
|
+
# @note We do not clear rebalance manager here as we may still have revocation info here
|
290
|
+
# that we want to consider valid prior to running another reconnection
|
254
291
|
end
|
255
292
|
end
|
256
293
|
|
@@ -279,30 +316,39 @@ module Karafka
|
|
279
316
|
|
280
317
|
time_poll.start
|
281
318
|
|
282
|
-
@kafka.poll(
|
319
|
+
@kafka.poll(timeout)
|
283
320
|
rescue ::Rdkafka::RdkafkaError => e
|
284
|
-
|
285
|
-
|
286
|
-
|
321
|
+
# We return nil, so we do not restart until running the whole loop
|
322
|
+
# This allows us to run revocation jobs and other things and we will pick up new work
|
323
|
+
# next time after dispatching all the things that are needed
|
324
|
+
#
|
325
|
+
# If we would retry here, the client reset would become transparent and we would not have
|
326
|
+
# a chance to take any actions
|
287
327
|
case e.code
|
288
328
|
when :max_poll_exceeded # -147
|
289
329
|
reset
|
330
|
+
return nil
|
290
331
|
when :transport # -195
|
291
332
|
reset
|
333
|
+
return nil
|
292
334
|
when :rebalance_in_progress # -27
|
293
335
|
reset
|
336
|
+
return nil
|
294
337
|
when :not_coordinator # 16
|
295
338
|
reset
|
339
|
+
return nil
|
296
340
|
when :network_exception # 13
|
297
341
|
reset
|
342
|
+
return nil
|
298
343
|
end
|
299
344
|
|
300
|
-
time_poll.
|
301
|
-
|
345
|
+
raise if time_poll.attempts > MAX_POLL_RETRIES
|
302
346
|
raise unless time_poll.retryable?
|
303
347
|
|
348
|
+
time_poll.checkpoint
|
304
349
|
time_poll.backoff
|
305
350
|
|
351
|
+
# On unknown errors we do our best to retry and handle them before raising
|
306
352
|
retry
|
307
353
|
end
|
308
354
|
|
@@ -346,7 +392,7 @@ module Karafka
|
|
346
392
|
# we are no longer responsible in a given process for processing those messages and they
|
347
393
|
# should have been picked up by a different process.
|
348
394
|
def remove_revoked_and_duplicated_messages
|
349
|
-
@rebalance_manager.
|
395
|
+
@rebalance_manager.lost_partitions.each do |topic, partitions|
|
350
396
|
partitions.each do |partition|
|
351
397
|
@buffer.delete(topic, partition)
|
352
398
|
end
|
@@ -18,15 +18,18 @@ module Karafka
|
|
18
18
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
19
19
|
# @return [Karafka::Connection::Listener] listener instance
|
20
20
|
def initialize(subscription_group, jobs_queue)
|
21
|
+
proc_config = ::Karafka::App.config.internal.processing
|
22
|
+
|
21
23
|
@id = SecureRandom.uuid
|
22
24
|
@subscription_group = subscription_group
|
23
25
|
@jobs_queue = jobs_queue
|
24
|
-
@
|
25
|
-
@pauses_manager = PausesManager.new
|
26
|
+
@coordinators = Processing::CoordinatorsBuffer.new
|
26
27
|
@client = Client.new(@subscription_group)
|
27
28
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
29
|
+
@jobs_builder = proc_config.jobs_builder
|
30
|
+
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
28
31
|
# We reference scheduler here as it is much faster than fetching this each time
|
29
|
-
@scheduler =
|
32
|
+
@scheduler = proc_config.scheduler
|
30
33
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
31
34
|
# We can do this that way because we always first schedule jobs using messages before we
|
32
35
|
# fetch another batch.
|
@@ -86,6 +89,9 @@ module Karafka
|
|
86
89
|
build_and_schedule_revoke_lost_partitions_jobs
|
87
90
|
|
88
91
|
# We wait only on jobs from our subscription group. Other groups are independent.
|
92
|
+
# This will block on revoked jobs until they are finished. Those are not meant to last
|
93
|
+
# long and should not have any bigger impact on the system. Doing this in a blocking way
|
94
|
+
# simplifies the overall design and prevents from race conditions
|
89
95
|
wait
|
90
96
|
|
91
97
|
build_and_schedule_consumption_jobs
|
@@ -136,7 +142,7 @@ module Karafka
|
|
136
142
|
|
137
143
|
# Resumes processing of partitions that were paused due to an error.
|
138
144
|
def resume_paused_partitions
|
139
|
-
@
|
145
|
+
@coordinators.resume do |topic, partition|
|
140
146
|
@client.resume(topic, partition)
|
141
147
|
end
|
142
148
|
end
|
@@ -152,9 +158,21 @@ module Karafka
|
|
152
158
|
|
153
159
|
revoked_partitions.each do |topic, partitions|
|
154
160
|
partitions.each do |partition|
|
155
|
-
|
156
|
-
|
157
|
-
|
161
|
+
@coordinators.revoke(topic, partition)
|
162
|
+
|
163
|
+
# There may be a case where we have lost partition of which data we have never
|
164
|
+
# processed (if it was assigned and revoked really fast), thus we may not have it
|
165
|
+
# here. In cases like this, we do not run a revocation job
|
166
|
+
@executors.find_all(topic, partition).each do |executor|
|
167
|
+
jobs << @jobs_builder.revoked(executor)
|
168
|
+
end
|
169
|
+
|
170
|
+
# We need to remove all the executors of a given topic partition that we have lost, so
|
171
|
+
# next time we pick up it's work, new executors kick in. This may be needed especially
|
172
|
+
# for LRJ where we could end up with a race condition
|
173
|
+
# This revocation needs to happen after the jobs are scheduled, otherwise they would
|
174
|
+
# be scheduled with new executors instead of old
|
175
|
+
@executors.revoke(topic, partition)
|
158
176
|
end
|
159
177
|
end
|
160
178
|
|
@@ -191,11 +209,19 @@ module Karafka
|
|
191
209
|
jobs = []
|
192
210
|
|
193
211
|
@messages_buffer.each do |topic, partition, messages|
|
194
|
-
|
212
|
+
coordinator = @coordinators.find_or_create(topic, partition)
|
213
|
+
|
214
|
+
# Start work coordination for this topic partition
|
215
|
+
coordinator.start(messages)
|
195
216
|
|
196
|
-
|
217
|
+
@partitioner.call(topic, messages) do |group_id, partition_messages|
|
218
|
+
# Count the job we're going to create here
|
219
|
+
coordinator.increment
|
197
220
|
|
198
|
-
|
221
|
+
executor = @executors.find_or_create(topic, partition, group_id)
|
222
|
+
|
223
|
+
jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
|
224
|
+
end
|
199
225
|
end
|
200
226
|
|
201
227
|
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
@@ -231,7 +257,7 @@ module Karafka
|
|
231
257
|
@jobs_queue.wait(@subscription_group.id)
|
232
258
|
@jobs_queue.clear(@subscription_group.id)
|
233
259
|
@client.reset
|
234
|
-
@
|
260
|
+
@coordinators.reset
|
235
261
|
@executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
|
236
262
|
end
|
237
263
|
end
|
@@ -18,13 +18,15 @@ module Karafka
|
|
18
18
|
# Empty array for internal usage not to create new objects
|
19
19
|
EMPTY_ARRAY = [].freeze
|
20
20
|
|
21
|
+
attr_reader :assigned_partitions, :revoked_partitions
|
22
|
+
|
21
23
|
private_constant :EMPTY_ARRAY
|
22
24
|
|
23
25
|
# @return [RebalanceManager]
|
24
26
|
def initialize
|
25
27
|
@assigned_partitions = {}
|
26
28
|
@revoked_partitions = {}
|
27
|
-
@
|
29
|
+
@changed = false
|
28
30
|
end
|
29
31
|
|
30
32
|
# Resets the rebalance manager state
|
@@ -33,26 +35,12 @@ module Karafka
|
|
33
35
|
def clear
|
34
36
|
@assigned_partitions.clear
|
35
37
|
@revoked_partitions.clear
|
36
|
-
@
|
37
|
-
end
|
38
|
-
|
39
|
-
# @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
|
40
|
-
# which we've lost partitions and array with ids of the partitions as the value
|
41
|
-
# @note We do not consider as lost topics and partitions that got revoked and assigned
|
42
|
-
def revoked_partitions
|
43
|
-
return @revoked_partitions if @revoked_partitions.empty?
|
44
|
-
return @lost_partitions unless @lost_partitions.empty?
|
45
|
-
|
46
|
-
@revoked_partitions.each do |topic, partitions|
|
47
|
-
@lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
|
48
|
-
end
|
49
|
-
|
50
|
-
@lost_partitions
|
38
|
+
@changed = false
|
51
39
|
end
|
52
40
|
|
53
|
-
# @return [Boolean]
|
54
|
-
def
|
55
|
-
|
41
|
+
# @return [Boolean] indicates a state change in the partitions assignment
|
42
|
+
def changed?
|
43
|
+
@changed
|
56
44
|
end
|
57
45
|
|
58
46
|
# Callback that kicks in inside of rdkafka, when new partitions are assigned.
|
@@ -62,6 +50,7 @@ module Karafka
|
|
62
50
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
63
51
|
def on_partitions_assigned(_, partitions)
|
64
52
|
@assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
53
|
+
@changed = true
|
65
54
|
end
|
66
55
|
|
67
56
|
# Callback that kicks in inside of rdkafka, when partitions are revoked.
|
@@ -71,6 +60,18 @@ module Karafka
|
|
71
60
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
72
61
|
def on_partitions_revoked(_, partitions)
|
73
62
|
@revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
63
|
+
@changed = true
|
64
|
+
end
|
65
|
+
|
66
|
+
# We consider as lost only partitions that were taken away and not re-assigned back to us
|
67
|
+
def lost_partitions
|
68
|
+
lost_partitions = {}
|
69
|
+
|
70
|
+
revoked_partitions.each do |topic, partitions|
|
71
|
+
lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
|
72
|
+
end
|
73
|
+
|
74
|
+
lost_partitions
|
74
75
|
end
|
75
76
|
end
|
76
77
|
end
|
@@ -30,12 +30,26 @@ module Karafka
|
|
30
30
|
|
31
31
|
# We validate internals just to be sure, that they are present and working
|
32
32
|
required(:internal).schema do
|
33
|
-
required(:routing_builder)
|
34
|
-
required(:subscription_groups_builder)
|
35
|
-
required(:jobs_builder)
|
36
33
|
required(:status)
|
37
34
|
required(:process)
|
38
|
-
|
35
|
+
|
36
|
+
required(:routing).schema do
|
37
|
+
required(:builder)
|
38
|
+
required(:subscription_groups_builder)
|
39
|
+
end
|
40
|
+
|
41
|
+
required(:processing).schema do
|
42
|
+
required(:jobs_builder)
|
43
|
+
required(:scheduler)
|
44
|
+
required(:coordinator_class)
|
45
|
+
required(:partitioner_class)
|
46
|
+
end
|
47
|
+
|
48
|
+
required(:active_job).schema do
|
49
|
+
required(:dispatcher)
|
50
|
+
required(:job_options_contract)
|
51
|
+
required(:consumer_class)
|
52
|
+
end
|
39
53
|
end
|
40
54
|
end
|
41
55
|
|
@@ -12,7 +12,7 @@ module Karafka
|
|
12
12
|
# If there were no consumer_groups declared in the server cli, it means that we will
|
13
13
|
# run all of them and no need to validate them here at all
|
14
14
|
if !value.nil? &&
|
15
|
-
!(value - Karafka::App.config.internal.
|
15
|
+
!(value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
|
16
16
|
key(:consumer_groups).failure(:consumer_groups_inclusion)
|
17
17
|
end
|
18
18
|
end
|
data/lib/karafka/errors.rb
CHANGED
@@ -47,5 +47,8 @@ module Karafka
|
|
47
47
|
# Used to instrument this error into the error notifications
|
48
48
|
# We do not raise it so we won't crash deployed systems
|
49
49
|
ExpiredLicenseTokenError = Class.new(BaseError)
|
50
|
+
|
51
|
+
# This should never happen. Please open an issue if it does.
|
52
|
+
InvalidCoordinatorState = Class.new(BaseError)
|
50
53
|
end
|
51
54
|
end
|
@@ -98,9 +98,6 @@ module Karafka
|
|
98
98
|
details = (error.backtrace || []).join("\n")
|
99
99
|
|
100
100
|
case type
|
101
|
-
when 'consumer.prepared.error'
|
102
|
-
error "Consumer prepared error: #{error}"
|
103
|
-
error details
|
104
101
|
when 'consumer.consume.error'
|
105
102
|
error "Consumer consuming error: #{error}"
|
106
103
|
error details
|
@@ -20,26 +20,20 @@ module Karafka
|
|
20
20
|
#
|
21
21
|
# It contains slightly better revocation warranties than the regular blocking consumer as
|
22
22
|
# it can stop processing batch of jobs in the middle after the revocation.
|
23
|
-
class Consumer < Karafka::
|
23
|
+
class Consumer < Karafka::Pro::BaseConsumer
|
24
24
|
# Runs ActiveJob jobs processing and handles lrj if needed
|
25
25
|
def consume
|
26
26
|
messages.each do |message|
|
27
27
|
# If for any reason we've lost this partition, not worth iterating over new messages
|
28
28
|
# as they are no longer ours
|
29
|
-
|
29
|
+
break if revoked?
|
30
30
|
break if Karafka::App.stopping?
|
31
31
|
|
32
32
|
::ActiveJob::Base.execute(
|
33
33
|
::ActiveSupport::JSON.decode(message.raw_payload)
|
34
34
|
)
|
35
35
|
|
36
|
-
# We check it twice as the job may be long running
|
37
|
-
return if revoked?
|
38
|
-
|
39
36
|
mark_as_consumed(message)
|
40
|
-
|
41
|
-
# Do not process more if we are shutting down
|
42
|
-
break if Karafka::App.stopping?
|
43
37
|
end
|
44
38
|
end
|
45
39
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Karafka PRO consumer.
|
15
|
+
#
|
16
|
+
# If you use PRO, all your consumers should inherit (indirectly) from it.
|
17
|
+
#
|
18
|
+
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
19
|
+
# after each batch is processed.
|
20
|
+
class BaseConsumer < Karafka::BaseConsumer
|
21
|
+
# Pause for tops 31 years
|
22
|
+
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
+
|
24
|
+
private_constant :MAX_PAUSE_TIME
|
25
|
+
|
26
|
+
# Pauses processing of a given partition until we're done with the processing
|
27
|
+
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
+
def on_before_consume
|
29
|
+
return unless topic.long_running_job?
|
30
|
+
|
31
|
+
# This ensures, that when running LRJ with VP, things operate as expected
|
32
|
+
coordinator.on_started do |first_group_message|
|
33
|
+
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
34
|
+
# any messages
|
35
|
+
pause(first_group_message.offset, MAX_PAUSE_TIME)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Runs extra logic after consumption that is related to handling long running jobs
|
40
|
+
# @note This overwrites the '#on_after_consume' from the base consumer
|
41
|
+
def on_after_consume
|
42
|
+
coordinator.on_finished do |first_group_message, last_group_message|
|
43
|
+
on_after_consume_regular(first_group_message, last_group_message)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
# Handles the post-consumption flow depending on topic settings
|
50
|
+
#
|
51
|
+
# @param first_message [Karafka::Messages::Message]
|
52
|
+
# @param last_message [Karafka::Messages::Message]
|
53
|
+
def on_after_consume_regular(first_message, last_message)
|
54
|
+
if coordinator.success?
|
55
|
+
coordinator.pause_tracker.reset
|
56
|
+
|
57
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
58
|
+
# with manual offset management
|
59
|
+
# Mark as consumed only if manual offset management is not on
|
60
|
+
mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
|
61
|
+
|
62
|
+
# If this is not a long running job there is nothing for us to do here
|
63
|
+
return unless topic.long_running_job?
|
64
|
+
|
65
|
+
# Once processing is done, we move to the new offset based on commits
|
66
|
+
# Here, in case manual offset management is off, we have the new proper offset of a
|
67
|
+
# first message from another batch from `@seek_offset`. If manual offset management
|
68
|
+
# is on, we move to place where the user indicated it was finished. This can create an
|
69
|
+
# interesting (yet valid) corner case, where with manual offset management on and no
|
70
|
+
# marking as consumed, we end up with an infinite loop processing same messages over and
|
71
|
+
# over again
|
72
|
+
seek(@seek_offset || first_message.offset)
|
73
|
+
|
74
|
+
resume
|
75
|
+
else
|
76
|
+
# If processing failed, we need to pause
|
77
|
+
pause(@seek_offset || first_message.offset)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -15,11 +15,13 @@ module Karafka
|
|
15
15
|
class Loader
|
16
16
|
# All the pro components that need to be loaded
|
17
17
|
COMPONENTS = %w[
|
18
|
+
base_consumer
|
18
19
|
performance_tracker
|
19
|
-
scheduler
|
20
|
-
base_consumer_extensions
|
20
|
+
processing/scheduler
|
21
21
|
processing/jobs/consume_non_blocking
|
22
22
|
processing/jobs_builder
|
23
|
+
processing/coordinator
|
24
|
+
processing/partitioner
|
23
25
|
routing/extensions
|
24
26
|
active_job/consumer
|
25
27
|
active_job/dispatcher
|
@@ -35,14 +37,18 @@ module Karafka
|
|
35
37
|
def setup(config)
|
36
38
|
COMPONENTS.each { |component| require_relative(component) }
|
37
39
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
icfg = config.internal
|
41
|
+
|
42
|
+
icfg.processing.coordinator_class = Processing::Coordinator
|
43
|
+
icfg.processing.partitioner_class = Processing::Partitioner
|
44
|
+
icfg.processing.scheduler = Processing::Scheduler.new
|
45
|
+
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
46
|
+
|
47
|
+
icfg.active_job.consumer_class = ActiveJob::Consumer
|
48
|
+
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
49
|
+
icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
43
50
|
|
44
51
|
::Karafka::Routing::Topic.include(Routing::Extensions)
|
45
|
-
::Karafka::BaseConsumer.prepend(BaseConsumerExtensions)
|
46
52
|
|
47
53
|
config.monitor.subscribe(PerformanceTracker.instance)
|
48
54
|
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
module Processing
|
6
|
+
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
7
|
+
# within the same partition
|
8
|
+
class Coordinator < ::Karafka::Processing::Coordinator
|
9
|
+
# @param args [Object] anything the base coordinator accepts
|
10
|
+
def initialize(*args)
|
11
|
+
super
|
12
|
+
@on_started_invoked = false
|
13
|
+
@on_finished_invoked = false
|
14
|
+
@flow_lock = Mutex.new
|
15
|
+
end
|
16
|
+
|
17
|
+
# Starts the coordination process
|
18
|
+
# @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
|
19
|
+
# going to coordinate.
|
20
|
+
def start(messages)
|
21
|
+
super
|
22
|
+
|
23
|
+
@mutex.synchronize do
|
24
|
+
@on_started_invoked = false
|
25
|
+
@on_finished_invoked = false
|
26
|
+
@first_message = messages.first
|
27
|
+
@last_message = messages.last
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Boolean] is the coordinated work finished or not
|
32
|
+
def finished?
|
33
|
+
@running_jobs.zero?
|
34
|
+
end
|
35
|
+
|
36
|
+
# Runs given code only once per all the coordinated jobs upon starting first of them
|
37
|
+
def on_started
|
38
|
+
@flow_lock.synchronize do
|
39
|
+
return if @on_started_invoked
|
40
|
+
|
41
|
+
@on_started_invoked = true
|
42
|
+
|
43
|
+
yield(@first_message, @last_message)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Runs once when all the work that is suppose to be coordinated is finished
|
48
|
+
# It runs once per all the coordinated jobs and should be used to run any type of post
|
49
|
+
# jobs coordination processing execution
|
50
|
+
def on_finished
|
51
|
+
@flow_lock.synchronize do
|
52
|
+
return unless finished?
|
53
|
+
return if @on_finished_invoked
|
54
|
+
|
55
|
+
@on_finished_invoked = true
|
56
|
+
|
57
|
+
yield(@first_message, @last_message)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -26,7 +26,7 @@ module Karafka
|
|
26
26
|
# management. This layer of the framework knows nothing about Kafka messages consumption.
|
27
27
|
class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
|
28
28
|
# Releases the blocking lock after it is done with the preparation phase for this job
|
29
|
-
def
|
29
|
+
def before_call
|
30
30
|
super
|
31
31
|
@non_blocking = true
|
32
32
|
end
|