karafka 2.0.0.beta3 → 2.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +18 -15
- data/CHANGELOG.md +37 -0
- data/CONTRIBUTING.md +0 -5
- data/Gemfile.lock +6 -6
- data/README.md +2 -10
- data/bin/benchmarks +2 -2
- data/bin/integrations +10 -3
- data/bin/{stress → stress_many} +1 -1
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/docker-compose.yml +32 -13
- data/karafka.gemspec +1 -1
- data/lib/karafka/active_job/routing/extensions.rb +1 -1
- data/lib/karafka/app.rb +2 -1
- data/lib/karafka/base_consumer.rb +59 -46
- data/lib/karafka/connection/client.rb +60 -14
- data/lib/karafka/connection/listener.rb +37 -11
- data/lib/karafka/connection/rebalance_manager.rb +20 -19
- data/lib/karafka/contracts/config.rb +18 -4
- data/lib/karafka/contracts/server_cli_options.rb +1 -1
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/logger_listener.rb +0 -3
- data/lib/karafka/instrumentation/monitor.rb +0 -1
- data/lib/karafka/pro/active_job/consumer.rb +2 -8
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/loader.rb +14 -8
- data/lib/karafka/pro/processing/coordinator.rb +63 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
- data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
- data/lib/karafka/pro/processing/partitioner.rb +41 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/extensions.rb +6 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +16 -9
- data/lib/karafka/processing/executors_buffer.rb +46 -15
- data/lib/karafka/processing/jobs/base.rb +8 -3
- data/lib/karafka/processing/jobs/consume.rb +11 -4
- data/lib/karafka/processing/jobs_builder.rb +3 -2
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +2 -2
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/topic.rb +14 -0
- data/lib/karafka/setup/config.rb +20 -10
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +16 -8
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
- data/lib/karafka/pro/scheduler.rb +0 -54
- data/lib/karafka/scheduler.rb +0 -20
@@ -36,6 +36,12 @@ module Karafka
|
|
36
36
|
# Marks if we need to offset. If we did not store offsets, we should not commit the offset
|
37
37
|
# position as it will crash rdkafka
|
38
38
|
@offsetting = false
|
39
|
+
# We need to keep track of what we have paused for resuming
|
40
|
+
# In case we loose partition, we still need to resume it, otherwise it won't be fetched
|
41
|
+
# again if we get reassigned to it later on. We need to keep them as after revocation we
|
42
|
+
# no longer may be able to fetch them from Kafka. We could build them but it is easier
|
43
|
+
# to just keep them here and use if needed when cannot be obtained
|
44
|
+
@paused_tpls = Hash.new { |h, k| h[k] = {} }
|
39
45
|
end
|
40
46
|
|
41
47
|
# Fetches messages within boundaries defined by the settings (time, size, topics, etc).
|
@@ -45,12 +51,13 @@ module Karafka
|
|
45
51
|
# @note This method should not be executed from many threads at the same time
|
46
52
|
def batch_poll
|
47
53
|
time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
|
48
|
-
time_poll.start
|
49
54
|
|
50
55
|
@buffer.clear
|
51
56
|
@rebalance_manager.clear
|
52
57
|
|
53
58
|
loop do
|
59
|
+
time_poll.start
|
60
|
+
|
54
61
|
# Don't fetch more messages if we do not have any time left
|
55
62
|
break if time_poll.exceeded?
|
56
63
|
# Don't fetch more messages if we've fetched max as we've wanted
|
@@ -69,7 +76,11 @@ module Karafka
|
|
69
76
|
# If partition revocation happens, we need to remove messages from revoked partitions
|
70
77
|
# as well as ensure we do not have duplicated due to the offset reset for partitions
|
71
78
|
# that we got assigned
|
72
|
-
|
79
|
+
# We also do early break, so the information about rebalance is used as soon as possible
|
80
|
+
if @rebalance_manager.changed?
|
81
|
+
remove_revoked_and_duplicated_messages
|
82
|
+
break
|
83
|
+
end
|
73
84
|
|
74
85
|
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
75
86
|
# we can break.
|
@@ -144,10 +155,14 @@ module Karafka
|
|
144
155
|
|
145
156
|
internal_commit_offsets(async: false)
|
146
157
|
|
158
|
+
# Here we do not use our cached tpls because we should not try to pause something we do
|
159
|
+
# not own anymore.
|
147
160
|
tpl = topic_partition_list(topic, partition)
|
148
161
|
|
149
162
|
return unless tpl
|
150
163
|
|
164
|
+
@paused_tpls[topic][partition] = tpl
|
165
|
+
|
151
166
|
@kafka.pause(tpl)
|
152
167
|
|
153
168
|
@kafka.seek(pause_msg)
|
@@ -169,9 +184,13 @@ module Karafka
|
|
169
184
|
# We can skip performance penalty since resuming should not happen too often
|
170
185
|
internal_commit_offsets(async: false)
|
171
186
|
|
172
|
-
|
187
|
+
# If we were not able, let's try to reuse the one we have (if we have)
|
188
|
+
tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
|
173
189
|
|
174
190
|
return unless tpl
|
191
|
+
# If we did not have it, it means we never paused this partition, thus no resume should
|
192
|
+
# happen in the first place
|
193
|
+
return unless @paused_tpls[topic].delete(partition)
|
175
194
|
|
176
195
|
@kafka.resume(tpl)
|
177
196
|
ensure
|
@@ -190,6 +209,7 @@ module Karafka
|
|
190
209
|
# Marks given message as consumed.
|
191
210
|
#
|
192
211
|
# @param [Karafka::Messages::Message] message that we want to mark as processed
|
212
|
+
# @return [Boolean] true if successful. False if we no longer own given partition
|
193
213
|
# @note This method won't trigger automatic offsets commits, rather relying on the offset
|
194
214
|
# check-pointing trigger that happens with each batch processed
|
195
215
|
def mark_as_consumed(message)
|
@@ -199,8 +219,10 @@ module Karafka
|
|
199
219
|
# Marks a given message as consumed and commits the offsets in a blocking way.
|
200
220
|
#
|
201
221
|
# @param [Karafka::Messages::Message] message that we want to mark as processed
|
222
|
+
# @return [Boolean] true if successful. False if we no longer own given partition
|
202
223
|
def mark_as_consumed!(message)
|
203
|
-
mark_as_consumed(message)
|
224
|
+
return false unless mark_as_consumed(message)
|
225
|
+
|
204
226
|
commit_offsets!
|
205
227
|
end
|
206
228
|
|
@@ -211,28 +233,42 @@ module Karafka
|
|
211
233
|
@mutex.synchronize do
|
212
234
|
@closed = false
|
213
235
|
@offsetting = false
|
236
|
+
@paused_tpls.clear
|
214
237
|
@kafka = build_consumer
|
215
238
|
end
|
216
239
|
end
|
217
240
|
|
218
241
|
private
|
219
242
|
|
243
|
+
# When we cannot store an offset, it means we no longer own the partition
|
244
|
+
#
|
220
245
|
# Non thread-safe offset storing method
|
221
246
|
# @param message [Karafka::Messages::Message]
|
247
|
+
# @return [Boolean] true if we could store the offset (if we still own the partition)
|
222
248
|
def internal_store_offset(message)
|
223
249
|
@offsetting = true
|
224
250
|
@kafka.store_offset(message)
|
251
|
+
true
|
252
|
+
rescue Rdkafka::RdkafkaError => e
|
253
|
+
return false if e.code == :assignment_lost
|
254
|
+
return false if e.code == :state
|
255
|
+
|
256
|
+
raise e
|
225
257
|
end
|
226
258
|
|
227
259
|
# Non thread-safe message committing method
|
228
260
|
# @param async [Boolean] should the commit happen async or sync (async by default)
|
261
|
+
# @return [Boolean] true if offset commit worked, false if we've lost the assignment
|
229
262
|
def internal_commit_offsets(async: true)
|
230
|
-
return unless @offsetting
|
263
|
+
return true unless @offsetting
|
231
264
|
|
232
265
|
@kafka.commit(nil, async)
|
233
266
|
@offsetting = false
|
267
|
+
|
268
|
+
true
|
234
269
|
rescue Rdkafka::RdkafkaError => e
|
235
|
-
return if e.code == :
|
270
|
+
return false if e.code == :assignment_lost
|
271
|
+
return false if e.code == :no_offset
|
236
272
|
|
237
273
|
raise e
|
238
274
|
end
|
@@ -250,7 +286,8 @@ module Karafka
|
|
250
286
|
|
251
287
|
@kafka.close
|
252
288
|
@buffer.clear
|
253
|
-
@
|
289
|
+
# @note We do not clear rebalance manager here as we may still have revocation info here
|
290
|
+
# that we want to consider valid prior to running another reconnection
|
254
291
|
end
|
255
292
|
end
|
256
293
|
|
@@ -279,30 +316,39 @@ module Karafka
|
|
279
316
|
|
280
317
|
time_poll.start
|
281
318
|
|
282
|
-
@kafka.poll(
|
319
|
+
@kafka.poll(timeout)
|
283
320
|
rescue ::Rdkafka::RdkafkaError => e
|
284
|
-
|
285
|
-
|
286
|
-
|
321
|
+
# We return nil, so we do not restart until running the whole loop
|
322
|
+
# This allows us to run revocation jobs and other things and we will pick up new work
|
323
|
+
# next time after dispatching all the things that are needed
|
324
|
+
#
|
325
|
+
# If we would retry here, the client reset would become transparent and we would not have
|
326
|
+
# a chance to take any actions
|
287
327
|
case e.code
|
288
328
|
when :max_poll_exceeded # -147
|
289
329
|
reset
|
330
|
+
return nil
|
290
331
|
when :transport # -195
|
291
332
|
reset
|
333
|
+
return nil
|
292
334
|
when :rebalance_in_progress # -27
|
293
335
|
reset
|
336
|
+
return nil
|
294
337
|
when :not_coordinator # 16
|
295
338
|
reset
|
339
|
+
return nil
|
296
340
|
when :network_exception # 13
|
297
341
|
reset
|
342
|
+
return nil
|
298
343
|
end
|
299
344
|
|
300
|
-
time_poll.
|
301
|
-
|
345
|
+
raise if time_poll.attempts > MAX_POLL_RETRIES
|
302
346
|
raise unless time_poll.retryable?
|
303
347
|
|
348
|
+
time_poll.checkpoint
|
304
349
|
time_poll.backoff
|
305
350
|
|
351
|
+
# On unknown errors we do our best to retry and handle them before raising
|
306
352
|
retry
|
307
353
|
end
|
308
354
|
|
@@ -346,7 +392,7 @@ module Karafka
|
|
346
392
|
# we are no longer responsible in a given process for processing those messages and they
|
347
393
|
# should have been picked up by a different process.
|
348
394
|
def remove_revoked_and_duplicated_messages
|
349
|
-
@rebalance_manager.
|
395
|
+
@rebalance_manager.lost_partitions.each do |topic, partitions|
|
350
396
|
partitions.each do |partition|
|
351
397
|
@buffer.delete(topic, partition)
|
352
398
|
end
|
@@ -18,15 +18,18 @@ module Karafka
|
|
18
18
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
19
19
|
# @return [Karafka::Connection::Listener] listener instance
|
20
20
|
def initialize(subscription_group, jobs_queue)
|
21
|
+
proc_config = ::Karafka::App.config.internal.processing
|
22
|
+
|
21
23
|
@id = SecureRandom.uuid
|
22
24
|
@subscription_group = subscription_group
|
23
25
|
@jobs_queue = jobs_queue
|
24
|
-
@
|
25
|
-
@pauses_manager = PausesManager.new
|
26
|
+
@coordinators = Processing::CoordinatorsBuffer.new
|
26
27
|
@client = Client.new(@subscription_group)
|
27
28
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
29
|
+
@jobs_builder = proc_config.jobs_builder
|
30
|
+
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
28
31
|
# We reference scheduler here as it is much faster than fetching this each time
|
29
|
-
@scheduler =
|
32
|
+
@scheduler = proc_config.scheduler
|
30
33
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
31
34
|
# We can do this that way because we always first schedule jobs using messages before we
|
32
35
|
# fetch another batch.
|
@@ -86,6 +89,9 @@ module Karafka
|
|
86
89
|
build_and_schedule_revoke_lost_partitions_jobs
|
87
90
|
|
88
91
|
# We wait only on jobs from our subscription group. Other groups are independent.
|
92
|
+
# This will block on revoked jobs until they are finished. Those are not meant to last
|
93
|
+
# long and should not have any bigger impact on the system. Doing this in a blocking way
|
94
|
+
# simplifies the overall design and prevents from race conditions
|
89
95
|
wait
|
90
96
|
|
91
97
|
build_and_schedule_consumption_jobs
|
@@ -136,7 +142,7 @@ module Karafka
|
|
136
142
|
|
137
143
|
# Resumes processing of partitions that were paused due to an error.
|
138
144
|
def resume_paused_partitions
|
139
|
-
@
|
145
|
+
@coordinators.resume do |topic, partition|
|
140
146
|
@client.resume(topic, partition)
|
141
147
|
end
|
142
148
|
end
|
@@ -152,9 +158,21 @@ module Karafka
|
|
152
158
|
|
153
159
|
revoked_partitions.each do |topic, partitions|
|
154
160
|
partitions.each do |partition|
|
155
|
-
|
156
|
-
|
157
|
-
|
161
|
+
@coordinators.revoke(topic, partition)
|
162
|
+
|
163
|
+
# There may be a case where we have lost partition of which data we have never
|
164
|
+
# processed (if it was assigned and revoked really fast), thus we may not have it
|
165
|
+
# here. In cases like this, we do not run a revocation job
|
166
|
+
@executors.find_all(topic, partition).each do |executor|
|
167
|
+
jobs << @jobs_builder.revoked(executor)
|
168
|
+
end
|
169
|
+
|
170
|
+
# We need to remove all the executors of a given topic partition that we have lost, so
|
171
|
+
# next time we pick up it's work, new executors kick in. This may be needed especially
|
172
|
+
# for LRJ where we could end up with a race condition
|
173
|
+
# This revocation needs to happen after the jobs are scheduled, otherwise they would
|
174
|
+
# be scheduled with new executors instead of old
|
175
|
+
@executors.revoke(topic, partition)
|
158
176
|
end
|
159
177
|
end
|
160
178
|
|
@@ -191,11 +209,19 @@ module Karafka
|
|
191
209
|
jobs = []
|
192
210
|
|
193
211
|
@messages_buffer.each do |topic, partition, messages|
|
194
|
-
|
212
|
+
coordinator = @coordinators.find_or_create(topic, partition)
|
213
|
+
|
214
|
+
# Start work coordination for this topic partition
|
215
|
+
coordinator.start(messages)
|
195
216
|
|
196
|
-
|
217
|
+
@partitioner.call(topic, messages) do |group_id, partition_messages|
|
218
|
+
# Count the job we're going to create here
|
219
|
+
coordinator.increment
|
197
220
|
|
198
|
-
|
221
|
+
executor = @executors.find_or_create(topic, partition, group_id)
|
222
|
+
|
223
|
+
jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
|
224
|
+
end
|
199
225
|
end
|
200
226
|
|
201
227
|
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
@@ -231,7 +257,7 @@ module Karafka
|
|
231
257
|
@jobs_queue.wait(@subscription_group.id)
|
232
258
|
@jobs_queue.clear(@subscription_group.id)
|
233
259
|
@client.reset
|
234
|
-
@
|
260
|
+
@coordinators.reset
|
235
261
|
@executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
|
236
262
|
end
|
237
263
|
end
|
@@ -18,13 +18,15 @@ module Karafka
|
|
18
18
|
# Empty array for internal usage not to create new objects
|
19
19
|
EMPTY_ARRAY = [].freeze
|
20
20
|
|
21
|
+
attr_reader :assigned_partitions, :revoked_partitions
|
22
|
+
|
21
23
|
private_constant :EMPTY_ARRAY
|
22
24
|
|
23
25
|
# @return [RebalanceManager]
|
24
26
|
def initialize
|
25
27
|
@assigned_partitions = {}
|
26
28
|
@revoked_partitions = {}
|
27
|
-
@
|
29
|
+
@changed = false
|
28
30
|
end
|
29
31
|
|
30
32
|
# Resets the rebalance manager state
|
@@ -33,26 +35,12 @@ module Karafka
|
|
33
35
|
def clear
|
34
36
|
@assigned_partitions.clear
|
35
37
|
@revoked_partitions.clear
|
36
|
-
@
|
37
|
-
end
|
38
|
-
|
39
|
-
# @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
|
40
|
-
# which we've lost partitions and array with ids of the partitions as the value
|
41
|
-
# @note We do not consider as lost topics and partitions that got revoked and assigned
|
42
|
-
def revoked_partitions
|
43
|
-
return @revoked_partitions if @revoked_partitions.empty?
|
44
|
-
return @lost_partitions unless @lost_partitions.empty?
|
45
|
-
|
46
|
-
@revoked_partitions.each do |topic, partitions|
|
47
|
-
@lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
|
48
|
-
end
|
49
|
-
|
50
|
-
@lost_partitions
|
38
|
+
@changed = false
|
51
39
|
end
|
52
40
|
|
53
|
-
# @return [Boolean]
|
54
|
-
def
|
55
|
-
|
41
|
+
# @return [Boolean] indicates a state change in the partitions assignment
|
42
|
+
def changed?
|
43
|
+
@changed
|
56
44
|
end
|
57
45
|
|
58
46
|
# Callback that kicks in inside of rdkafka, when new partitions are assigned.
|
@@ -62,6 +50,7 @@ module Karafka
|
|
62
50
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
63
51
|
def on_partitions_assigned(_, partitions)
|
64
52
|
@assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
53
|
+
@changed = true
|
65
54
|
end
|
66
55
|
|
67
56
|
# Callback that kicks in inside of rdkafka, when partitions are revoked.
|
@@ -71,6 +60,18 @@ module Karafka
|
|
71
60
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
72
61
|
def on_partitions_revoked(_, partitions)
|
73
62
|
@revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
63
|
+
@changed = true
|
64
|
+
end
|
65
|
+
|
66
|
+
# We consider as lost only partitions that were taken away and not re-assigned back to us
|
67
|
+
def lost_partitions
|
68
|
+
lost_partitions = {}
|
69
|
+
|
70
|
+
revoked_partitions.each do |topic, partitions|
|
71
|
+
lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
|
72
|
+
end
|
73
|
+
|
74
|
+
lost_partitions
|
74
75
|
end
|
75
76
|
end
|
76
77
|
end
|
@@ -30,12 +30,26 @@ module Karafka
|
|
30
30
|
|
31
31
|
# We validate internals just to be sure, that they are present and working
|
32
32
|
required(:internal).schema do
|
33
|
-
required(:routing_builder)
|
34
|
-
required(:subscription_groups_builder)
|
35
|
-
required(:jobs_builder)
|
36
33
|
required(:status)
|
37
34
|
required(:process)
|
38
|
-
|
35
|
+
|
36
|
+
required(:routing).schema do
|
37
|
+
required(:builder)
|
38
|
+
required(:subscription_groups_builder)
|
39
|
+
end
|
40
|
+
|
41
|
+
required(:processing).schema do
|
42
|
+
required(:jobs_builder)
|
43
|
+
required(:scheduler)
|
44
|
+
required(:coordinator_class)
|
45
|
+
required(:partitioner_class)
|
46
|
+
end
|
47
|
+
|
48
|
+
required(:active_job).schema do
|
49
|
+
required(:dispatcher)
|
50
|
+
required(:job_options_contract)
|
51
|
+
required(:consumer_class)
|
52
|
+
end
|
39
53
|
end
|
40
54
|
end
|
41
55
|
|
@@ -12,7 +12,7 @@ module Karafka
|
|
12
12
|
# If there were no consumer_groups declared in the server cli, it means that we will
|
13
13
|
# run all of them and no need to validate them here at all
|
14
14
|
if !value.nil? &&
|
15
|
-
!(value - Karafka::App.config.internal.
|
15
|
+
!(value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
|
16
16
|
key(:consumer_groups).failure(:consumer_groups_inclusion)
|
17
17
|
end
|
18
18
|
end
|
data/lib/karafka/errors.rb
CHANGED
@@ -47,5 +47,8 @@ module Karafka
|
|
47
47
|
# Used to instrument this error into the error notifications
|
48
48
|
# We do not raise it so we won't crash deployed systems
|
49
49
|
ExpiredLicenseTokenError = Class.new(BaseError)
|
50
|
+
|
51
|
+
# This should never happen. Please open an issue if it does.
|
52
|
+
InvalidCoordinatorState = Class.new(BaseError)
|
50
53
|
end
|
51
54
|
end
|
@@ -98,9 +98,6 @@ module Karafka
|
|
98
98
|
details = (error.backtrace || []).join("\n")
|
99
99
|
|
100
100
|
case type
|
101
|
-
when 'consumer.prepared.error'
|
102
|
-
error "Consumer prepared error: #{error}"
|
103
|
-
error details
|
104
101
|
when 'consumer.consume.error'
|
105
102
|
error "Consumer consuming error: #{error}"
|
106
103
|
error details
|
@@ -20,26 +20,20 @@ module Karafka
|
|
20
20
|
#
|
21
21
|
# It contains slightly better revocation warranties than the regular blocking consumer as
|
22
22
|
# it can stop processing batch of jobs in the middle after the revocation.
|
23
|
-
class Consumer < Karafka::
|
23
|
+
class Consumer < Karafka::Pro::BaseConsumer
|
24
24
|
# Runs ActiveJob jobs processing and handles lrj if needed
|
25
25
|
def consume
|
26
26
|
messages.each do |message|
|
27
27
|
# If for any reason we've lost this partition, not worth iterating over new messages
|
28
28
|
# as they are no longer ours
|
29
|
-
|
29
|
+
break if revoked?
|
30
30
|
break if Karafka::App.stopping?
|
31
31
|
|
32
32
|
::ActiveJob::Base.execute(
|
33
33
|
::ActiveSupport::JSON.decode(message.raw_payload)
|
34
34
|
)
|
35
35
|
|
36
|
-
# We check it twice as the job may be long running
|
37
|
-
return if revoked?
|
38
|
-
|
39
36
|
mark_as_consumed(message)
|
40
|
-
|
41
|
-
# Do not process more if we are shutting down
|
42
|
-
break if Karafka::App.stopping?
|
43
37
|
end
|
44
38
|
end
|
45
39
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Karafka PRO consumer.
|
15
|
+
#
|
16
|
+
# If you use PRO, all your consumers should inherit (indirectly) from it.
|
17
|
+
#
|
18
|
+
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
19
|
+
# after each batch is processed.
|
20
|
+
class BaseConsumer < Karafka::BaseConsumer
|
21
|
+
# Pause for tops 31 years
|
22
|
+
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
+
|
24
|
+
private_constant :MAX_PAUSE_TIME
|
25
|
+
|
26
|
+
# Pauses processing of a given partition until we're done with the processing
|
27
|
+
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
+
def on_before_consume
|
29
|
+
return unless topic.long_running_job?
|
30
|
+
|
31
|
+
# This ensures, that when running LRJ with VP, things operate as expected
|
32
|
+
coordinator.on_started do |first_group_message|
|
33
|
+
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
34
|
+
# any messages
|
35
|
+
pause(first_group_message.offset, MAX_PAUSE_TIME)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Runs extra logic after consumption that is related to handling long running jobs
|
40
|
+
# @note This overwrites the '#on_after_consume' from the base consumer
|
41
|
+
def on_after_consume
|
42
|
+
coordinator.on_finished do |first_group_message, last_group_message|
|
43
|
+
on_after_consume_regular(first_group_message, last_group_message)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
# Handles the post-consumption flow depending on topic settings
|
50
|
+
#
|
51
|
+
# @param first_message [Karafka::Messages::Message]
|
52
|
+
# @param last_message [Karafka::Messages::Message]
|
53
|
+
def on_after_consume_regular(first_message, last_message)
|
54
|
+
if coordinator.success?
|
55
|
+
coordinator.pause_tracker.reset
|
56
|
+
|
57
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
58
|
+
# with manual offset management
|
59
|
+
# Mark as consumed only if manual offset management is not on
|
60
|
+
mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
|
61
|
+
|
62
|
+
# If this is not a long running job there is nothing for us to do here
|
63
|
+
return unless topic.long_running_job?
|
64
|
+
|
65
|
+
# Once processing is done, we move to the new offset based on commits
|
66
|
+
# Here, in case manual offset management is off, we have the new proper offset of a
|
67
|
+
# first message from another batch from `@seek_offset`. If manual offset management
|
68
|
+
# is on, we move to place where the user indicated it was finished. This can create an
|
69
|
+
# interesting (yet valid) corner case, where with manual offset management on and no
|
70
|
+
# marking as consumed, we end up with an infinite loop processing same messages over and
|
71
|
+
# over again
|
72
|
+
seek(@seek_offset || first_message.offset)
|
73
|
+
|
74
|
+
resume
|
75
|
+
else
|
76
|
+
# If processing failed, we need to pause
|
77
|
+
pause(@seek_offset || first_message.offset)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -15,11 +15,13 @@ module Karafka
|
|
15
15
|
class Loader
|
16
16
|
# All the pro components that need to be loaded
|
17
17
|
COMPONENTS = %w[
|
18
|
+
base_consumer
|
18
19
|
performance_tracker
|
19
|
-
scheduler
|
20
|
-
base_consumer_extensions
|
20
|
+
processing/scheduler
|
21
21
|
processing/jobs/consume_non_blocking
|
22
22
|
processing/jobs_builder
|
23
|
+
processing/coordinator
|
24
|
+
processing/partitioner
|
23
25
|
routing/extensions
|
24
26
|
active_job/consumer
|
25
27
|
active_job/dispatcher
|
@@ -35,14 +37,18 @@ module Karafka
|
|
35
37
|
def setup(config)
|
36
38
|
COMPONENTS.each { |component| require_relative(component) }
|
37
39
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
icfg = config.internal
|
41
|
+
|
42
|
+
icfg.processing.coordinator_class = Processing::Coordinator
|
43
|
+
icfg.processing.partitioner_class = Processing::Partitioner
|
44
|
+
icfg.processing.scheduler = Processing::Scheduler.new
|
45
|
+
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
46
|
+
|
47
|
+
icfg.active_job.consumer_class = ActiveJob::Consumer
|
48
|
+
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
49
|
+
icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
43
50
|
|
44
51
|
::Karafka::Routing::Topic.include(Routing::Extensions)
|
45
|
-
::Karafka::BaseConsumer.prepend(BaseConsumerExtensions)
|
46
52
|
|
47
53
|
config.monitor.subscribe(PerformanceTracker.instance)
|
48
54
|
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Pro
|
5
|
+
module Processing
|
6
|
+
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
7
|
+
# within the same partition
|
8
|
+
class Coordinator < ::Karafka::Processing::Coordinator
|
9
|
+
# @param args [Object] anything the base coordinator accepts
|
10
|
+
def initialize(*args)
|
11
|
+
super
|
12
|
+
@on_started_invoked = false
|
13
|
+
@on_finished_invoked = false
|
14
|
+
@flow_lock = Mutex.new
|
15
|
+
end
|
16
|
+
|
17
|
+
# Starts the coordination process
|
18
|
+
# @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
|
19
|
+
# going to coordinate.
|
20
|
+
def start(messages)
|
21
|
+
super
|
22
|
+
|
23
|
+
@mutex.synchronize do
|
24
|
+
@on_started_invoked = false
|
25
|
+
@on_finished_invoked = false
|
26
|
+
@first_message = messages.first
|
27
|
+
@last_message = messages.last
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Boolean] is the coordinated work finished or not
|
32
|
+
def finished?
|
33
|
+
@running_jobs.zero?
|
34
|
+
end
|
35
|
+
|
36
|
+
# Runs given code only once per all the coordinated jobs upon starting first of them
|
37
|
+
def on_started
|
38
|
+
@flow_lock.synchronize do
|
39
|
+
return if @on_started_invoked
|
40
|
+
|
41
|
+
@on_started_invoked = true
|
42
|
+
|
43
|
+
yield(@first_message, @last_message)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Runs once when all the work that is suppose to be coordinated is finished
|
48
|
+
# It runs once per all the coordinated jobs and should be used to run any type of post
|
49
|
+
# jobs coordination processing execution
|
50
|
+
def on_finished
|
51
|
+
@flow_lock.synchronize do
|
52
|
+
return unless finished?
|
53
|
+
return if @on_finished_invoked
|
54
|
+
|
55
|
+
@on_finished_invoked = true
|
56
|
+
|
57
|
+
yield(@first_message, @last_message)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -26,7 +26,7 @@ module Karafka
|
|
26
26
|
# management. This layer of the framework knows nothing about Kafka messages consumption.
|
27
27
|
class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
|
28
28
|
# Releases the blocking lock after it is done with the preparation phase for this job
|
29
|
-
def
|
29
|
+
def before_call
|
30
30
|
super
|
31
31
|
@non_blocking = true
|
32
32
|
end
|