karafka 2.0.0.beta3 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -15
  4. data/CHANGELOG.md +37 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +6 -6
  7. data/README.md +2 -10
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/bin/wait_for_kafka +20 -0
  13. data/docker-compose.yml +32 -13
  14. data/karafka.gemspec +1 -1
  15. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  16. data/lib/karafka/app.rb +2 -1
  17. data/lib/karafka/base_consumer.rb +59 -46
  18. data/lib/karafka/connection/client.rb +60 -14
  19. data/lib/karafka/connection/listener.rb +37 -11
  20. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  21. data/lib/karafka/contracts/config.rb +18 -4
  22. data/lib/karafka/contracts/server_cli_options.rb +1 -1
  23. data/lib/karafka/errors.rb +3 -0
  24. data/lib/karafka/instrumentation/logger_listener.rb +0 -3
  25. data/lib/karafka/instrumentation/monitor.rb +0 -1
  26. data/lib/karafka/pro/active_job/consumer.rb +2 -8
  27. data/lib/karafka/pro/base_consumer.rb +82 -0
  28. data/lib/karafka/pro/loader.rb +14 -8
  29. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  30. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
  31. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  32. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  33. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  34. data/lib/karafka/pro/routing/extensions.rb +6 -0
  35. data/lib/karafka/processing/coordinator.rb +88 -0
  36. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  37. data/lib/karafka/processing/executor.rb +16 -9
  38. data/lib/karafka/processing/executors_buffer.rb +46 -15
  39. data/lib/karafka/processing/jobs/base.rb +8 -3
  40. data/lib/karafka/processing/jobs/consume.rb +11 -4
  41. data/lib/karafka/processing/jobs_builder.rb +3 -2
  42. data/lib/karafka/processing/partitioner.rb +22 -0
  43. data/lib/karafka/processing/result.rb +29 -0
  44. data/lib/karafka/processing/scheduler.rb +22 -0
  45. data/lib/karafka/processing/worker.rb +2 -2
  46. data/lib/karafka/routing/consumer_group.rb +1 -1
  47. data/lib/karafka/routing/topic.rb +14 -0
  48. data/lib/karafka/setup/config.rb +20 -10
  49. data/lib/karafka/version.rb +1 -1
  50. data.tar.gz.sig +0 -0
  51. metadata +16 -8
  52. metadata.gz.sig +0 -0
  53. data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
  54. data/lib/karafka/pro/scheduler.rb +0 -54
  55. data/lib/karafka/scheduler.rb +0 -20
@@ -36,6 +36,12 @@ module Karafka
36
36
  # Marks if we need to offset. If we did not store offsets, we should not commit the offset
37
37
  # position as it will crash rdkafka
38
38
  @offsetting = false
39
+ # We need to keep track of what we have paused for resuming
40
+ # In case we loose partition, we still need to resume it, otherwise it won't be fetched
41
+ # again if we get reassigned to it later on. We need to keep them as after revocation we
42
+ # no longer may be able to fetch them from Kafka. We could build them but it is easier
43
+ # to just keep them here and use if needed when cannot be obtained
44
+ @paused_tpls = Hash.new { |h, k| h[k] = {} }
39
45
  end
40
46
 
41
47
  # Fetches messages within boundaries defined by the settings (time, size, topics, etc).
@@ -45,12 +51,13 @@ module Karafka
45
51
  # @note This method should not be executed from many threads at the same time
46
52
  def batch_poll
47
53
  time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
48
- time_poll.start
49
54
 
50
55
  @buffer.clear
51
56
  @rebalance_manager.clear
52
57
 
53
58
  loop do
59
+ time_poll.start
60
+
54
61
  # Don't fetch more messages if we do not have any time left
55
62
  break if time_poll.exceeded?
56
63
  # Don't fetch more messages if we've fetched max as we've wanted
@@ -69,7 +76,11 @@ module Karafka
69
76
  # If partition revocation happens, we need to remove messages from revoked partitions
70
77
  # as well as ensure we do not have duplicated due to the offset reset for partitions
71
78
  # that we got assigned
72
- remove_revoked_and_duplicated_messages if @rebalance_manager.revoked_partitions?
79
+ # We also do early break, so the information about rebalance is used as soon as possible
80
+ if @rebalance_manager.changed?
81
+ remove_revoked_and_duplicated_messages
82
+ break
83
+ end
73
84
 
74
85
  # Finally once we've (potentially) removed revoked, etc, if no messages were returned
75
86
  # we can break.
@@ -144,10 +155,14 @@ module Karafka
144
155
 
145
156
  internal_commit_offsets(async: false)
146
157
 
158
+ # Here we do not use our cached tpls because we should not try to pause something we do
159
+ # not own anymore.
147
160
  tpl = topic_partition_list(topic, partition)
148
161
 
149
162
  return unless tpl
150
163
 
164
+ @paused_tpls[topic][partition] = tpl
165
+
151
166
  @kafka.pause(tpl)
152
167
 
153
168
  @kafka.seek(pause_msg)
@@ -169,9 +184,13 @@ module Karafka
169
184
  # We can skip performance penalty since resuming should not happen too often
170
185
  internal_commit_offsets(async: false)
171
186
 
172
- tpl = topic_partition_list(topic, partition)
187
+ # If we were not able, let's try to reuse the one we have (if we have)
188
+ tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
173
189
 
174
190
  return unless tpl
191
+ # If we did not have it, it means we never paused this partition, thus no resume should
192
+ # happen in the first place
193
+ return unless @paused_tpls[topic].delete(partition)
175
194
 
176
195
  @kafka.resume(tpl)
177
196
  ensure
@@ -190,6 +209,7 @@ module Karafka
190
209
  # Marks given message as consumed.
191
210
  #
192
211
  # @param [Karafka::Messages::Message] message that we want to mark as processed
212
+ # @return [Boolean] true if successful. False if we no longer own given partition
193
213
  # @note This method won't trigger automatic offsets commits, rather relying on the offset
194
214
  # check-pointing trigger that happens with each batch processed
195
215
  def mark_as_consumed(message)
@@ -199,8 +219,10 @@ module Karafka
199
219
  # Marks a given message as consumed and commits the offsets in a blocking way.
200
220
  #
201
221
  # @param [Karafka::Messages::Message] message that we want to mark as processed
222
+ # @return [Boolean] true if successful. False if we no longer own given partition
202
223
  def mark_as_consumed!(message)
203
- mark_as_consumed(message)
224
+ return false unless mark_as_consumed(message)
225
+
204
226
  commit_offsets!
205
227
  end
206
228
 
@@ -211,28 +233,42 @@ module Karafka
211
233
  @mutex.synchronize do
212
234
  @closed = false
213
235
  @offsetting = false
236
+ @paused_tpls.clear
214
237
  @kafka = build_consumer
215
238
  end
216
239
  end
217
240
 
218
241
  private
219
242
 
243
+ # When we cannot store an offset, it means we no longer own the partition
244
+ #
220
245
  # Non thread-safe offset storing method
221
246
  # @param message [Karafka::Messages::Message]
247
+ # @return [Boolean] true if we could store the offset (if we still own the partition)
222
248
  def internal_store_offset(message)
223
249
  @offsetting = true
224
250
  @kafka.store_offset(message)
251
+ true
252
+ rescue Rdkafka::RdkafkaError => e
253
+ return false if e.code == :assignment_lost
254
+ return false if e.code == :state
255
+
256
+ raise e
225
257
  end
226
258
 
227
259
  # Non thread-safe message committing method
228
260
  # @param async [Boolean] should the commit happen async or sync (async by default)
261
+ # @return [Boolean] true if offset commit worked, false if we've lost the assignment
229
262
  def internal_commit_offsets(async: true)
230
- return unless @offsetting
263
+ return true unless @offsetting
231
264
 
232
265
  @kafka.commit(nil, async)
233
266
  @offsetting = false
267
+
268
+ true
234
269
  rescue Rdkafka::RdkafkaError => e
235
- return if e.code == :no_offset
270
+ return false if e.code == :assignment_lost
271
+ return false if e.code == :no_offset
236
272
 
237
273
  raise e
238
274
  end
@@ -250,7 +286,8 @@ module Karafka
250
286
 
251
287
  @kafka.close
252
288
  @buffer.clear
253
- @rebalance_manager.clear
289
+ # @note We do not clear rebalance manager here as we may still have revocation info here
290
+ # that we want to consider valid prior to running another reconnection
254
291
  end
255
292
  end
256
293
 
@@ -279,30 +316,39 @@ module Karafka
279
316
 
280
317
  time_poll.start
281
318
 
282
- @kafka.poll(time_poll.remaining)
319
+ @kafka.poll(timeout)
283
320
  rescue ::Rdkafka::RdkafkaError => e
284
- raise if time_poll.attempts > MAX_POLL_RETRIES
285
- raise unless time_poll.retryable?
286
-
321
+ # We return nil, so we do not restart until running the whole loop
322
+ # This allows us to run revocation jobs and other things and we will pick up new work
323
+ # next time after dispatching all the things that are needed
324
+ #
325
+ # If we would retry here, the client reset would become transparent and we would not have
326
+ # a chance to take any actions
287
327
  case e.code
288
328
  when :max_poll_exceeded # -147
289
329
  reset
330
+ return nil
290
331
  when :transport # -195
291
332
  reset
333
+ return nil
292
334
  when :rebalance_in_progress # -27
293
335
  reset
336
+ return nil
294
337
  when :not_coordinator # 16
295
338
  reset
339
+ return nil
296
340
  when :network_exception # 13
297
341
  reset
342
+ return nil
298
343
  end
299
344
 
300
- time_poll.checkpoint
301
-
345
+ raise if time_poll.attempts > MAX_POLL_RETRIES
302
346
  raise unless time_poll.retryable?
303
347
 
348
+ time_poll.checkpoint
304
349
  time_poll.backoff
305
350
 
351
+ # On unknown errors we do our best to retry and handle them before raising
306
352
  retry
307
353
  end
308
354
 
@@ -346,7 +392,7 @@ module Karafka
346
392
  # we are no longer responsible in a given process for processing those messages and they
347
393
  # should have been picked up by a different process.
348
394
  def remove_revoked_and_duplicated_messages
349
- @rebalance_manager.revoked_partitions.each do |topic, partitions|
395
+ @rebalance_manager.lost_partitions.each do |topic, partitions|
350
396
  partitions.each do |partition|
351
397
  @buffer.delete(topic, partition)
352
398
  end
@@ -18,15 +18,18 @@ module Karafka
18
18
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
19
19
  # @return [Karafka::Connection::Listener] listener instance
20
20
  def initialize(subscription_group, jobs_queue)
21
+ proc_config = ::Karafka::App.config.internal.processing
22
+
21
23
  @id = SecureRandom.uuid
22
24
  @subscription_group = subscription_group
23
25
  @jobs_queue = jobs_queue
24
- @jobs_builder = ::Karafka::App.config.internal.jobs_builder
25
- @pauses_manager = PausesManager.new
26
+ @coordinators = Processing::CoordinatorsBuffer.new
26
27
  @client = Client.new(@subscription_group)
27
28
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
29
+ @jobs_builder = proc_config.jobs_builder
30
+ @partitioner = proc_config.partitioner_class.new(subscription_group)
28
31
  # We reference scheduler here as it is much faster than fetching this each time
29
- @scheduler = ::Karafka::App.config.internal.scheduler
32
+ @scheduler = proc_config.scheduler
30
33
  # We keep one buffer for messages to preserve memory and not allocate extra objects
31
34
  # We can do this that way because we always first schedule jobs using messages before we
32
35
  # fetch another batch.
@@ -86,6 +89,9 @@ module Karafka
86
89
  build_and_schedule_revoke_lost_partitions_jobs
87
90
 
88
91
  # We wait only on jobs from our subscription group. Other groups are independent.
92
+ # This will block on revoked jobs until they are finished. Those are not meant to last
93
+ # long and should not have any bigger impact on the system. Doing this in a blocking way
94
+ # simplifies the overall design and prevents from race conditions
89
95
  wait
90
96
 
91
97
  build_and_schedule_consumption_jobs
@@ -136,7 +142,7 @@ module Karafka
136
142
 
137
143
  # Resumes processing of partitions that were paused due to an error.
138
144
  def resume_paused_partitions
139
- @pauses_manager.resume do |topic, partition|
145
+ @coordinators.resume do |topic, partition|
140
146
  @client.resume(topic, partition)
141
147
  end
142
148
  end
@@ -152,9 +158,21 @@ module Karafka
152
158
 
153
159
  revoked_partitions.each do |topic, partitions|
154
160
  partitions.each do |partition|
155
- pause_tracker = @pauses_manager.fetch(topic, partition)
156
- executor = @executors.fetch(topic, partition, pause_tracker)
157
- jobs << @jobs_builder.revoked(executor)
161
+ @coordinators.revoke(topic, partition)
162
+
163
+ # There may be a case where we have lost partition of which data we have never
164
+ # processed (if it was assigned and revoked really fast), thus we may not have it
165
+ # here. In cases like this, we do not run a revocation job
166
+ @executors.find_all(topic, partition).each do |executor|
167
+ jobs << @jobs_builder.revoked(executor)
168
+ end
169
+
170
+ # We need to remove all the executors of a given topic partition that we have lost, so
171
+ # next time we pick up it's work, new executors kick in. This may be needed especially
172
+ # for LRJ where we could end up with a race condition
173
+ # This revocation needs to happen after the jobs are scheduled, otherwise they would
174
+ # be scheduled with new executors instead of old
175
+ @executors.revoke(topic, partition)
158
176
  end
159
177
  end
160
178
 
@@ -191,11 +209,19 @@ module Karafka
191
209
  jobs = []
192
210
 
193
211
  @messages_buffer.each do |topic, partition, messages|
194
- pause_tracker = @pauses_manager.fetch(topic, partition)
212
+ coordinator = @coordinators.find_or_create(topic, partition)
213
+
214
+ # Start work coordination for this topic partition
215
+ coordinator.start(messages)
195
216
 
196
- executor = @executors.fetch(topic, partition, pause_tracker)
217
+ @partitioner.call(topic, messages) do |group_id, partition_messages|
218
+ # Count the job we're going to create here
219
+ coordinator.increment
197
220
 
198
- jobs << @jobs_builder.consume(executor, messages)
221
+ executor = @executors.find_or_create(topic, partition, group_id)
222
+
223
+ jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
224
+ end
199
225
  end
200
226
 
201
227
  @scheduler.schedule_consumption(@jobs_queue, jobs)
@@ -231,7 +257,7 @@ module Karafka
231
257
  @jobs_queue.wait(@subscription_group.id)
232
258
  @jobs_queue.clear(@subscription_group.id)
233
259
  @client.reset
234
- @pauses_manager = PausesManager.new
260
+ @coordinators.reset
235
261
  @executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
236
262
  end
237
263
  end
@@ -18,13 +18,15 @@ module Karafka
18
18
  # Empty array for internal usage not to create new objects
19
19
  EMPTY_ARRAY = [].freeze
20
20
 
21
+ attr_reader :assigned_partitions, :revoked_partitions
22
+
21
23
  private_constant :EMPTY_ARRAY
22
24
 
23
25
  # @return [RebalanceManager]
24
26
  def initialize
25
27
  @assigned_partitions = {}
26
28
  @revoked_partitions = {}
27
- @lost_partitions = {}
29
+ @changed = false
28
30
  end
29
31
 
30
32
  # Resets the rebalance manager state
@@ -33,26 +35,12 @@ module Karafka
33
35
  def clear
34
36
  @assigned_partitions.clear
35
37
  @revoked_partitions.clear
36
- @lost_partitions.clear
37
- end
38
-
39
- # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
40
- # which we've lost partitions and array with ids of the partitions as the value
41
- # @note We do not consider as lost topics and partitions that got revoked and assigned
42
- def revoked_partitions
43
- return @revoked_partitions if @revoked_partitions.empty?
44
- return @lost_partitions unless @lost_partitions.empty?
45
-
46
- @revoked_partitions.each do |topic, partitions|
47
- @lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
48
- end
49
-
50
- @lost_partitions
38
+ @changed = false
51
39
  end
52
40
 
53
- # @return [Boolean] true if any partitions were revoked
54
- def revoked_partitions?
55
- !revoked_partitions.empty?
41
+ # @return [Boolean] indicates a state change in the partitions assignment
42
+ def changed?
43
+ @changed
56
44
  end
57
45
 
58
46
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
@@ -62,6 +50,7 @@ module Karafka
62
50
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
63
51
  def on_partitions_assigned(_, partitions)
64
52
  @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
53
+ @changed = true
65
54
  end
66
55
 
67
56
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
@@ -71,6 +60,18 @@ module Karafka
71
60
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
72
61
  def on_partitions_revoked(_, partitions)
73
62
  @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
63
+ @changed = true
64
+ end
65
+
66
+ # We consider as lost only partitions that were taken away and not re-assigned back to us
67
+ def lost_partitions
68
+ lost_partitions = {}
69
+
70
+ revoked_partitions.each do |topic, partitions|
71
+ lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
72
+ end
73
+
74
+ lost_partitions
74
75
  end
75
76
  end
76
77
  end
@@ -30,12 +30,26 @@ module Karafka
30
30
 
31
31
  # We validate internals just to be sure, that they are present and working
32
32
  required(:internal).schema do
33
- required(:routing_builder)
34
- required(:subscription_groups_builder)
35
- required(:jobs_builder)
36
33
  required(:status)
37
34
  required(:process)
38
- required(:scheduler)
35
+
36
+ required(:routing).schema do
37
+ required(:builder)
38
+ required(:subscription_groups_builder)
39
+ end
40
+
41
+ required(:processing).schema do
42
+ required(:jobs_builder)
43
+ required(:scheduler)
44
+ required(:coordinator_class)
45
+ required(:partitioner_class)
46
+ end
47
+
48
+ required(:active_job).schema do
49
+ required(:dispatcher)
50
+ required(:job_options_contract)
51
+ required(:consumer_class)
52
+ end
39
53
  end
40
54
  end
41
55
 
@@ -12,7 +12,7 @@ module Karafka
12
12
  # If there were no consumer_groups declared in the server cli, it means that we will
13
13
  # run all of them and no need to validate them here at all
14
14
  if !value.nil? &&
15
- !(value - Karafka::App.config.internal.routing_builder.map(&:name)).empty?
15
+ !(value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
16
16
  key(:consumer_groups).failure(:consumer_groups_inclusion)
17
17
  end
18
18
  end
@@ -47,5 +47,8 @@ module Karafka
47
47
  # Used to instrument this error into the error notifications
48
48
  # We do not raise it so we won't crash deployed systems
49
49
  ExpiredLicenseTokenError = Class.new(BaseError)
50
+
51
+ # This should never happen. Please open an issue if it does.
52
+ InvalidCoordinatorState = Class.new(BaseError)
50
53
  end
51
54
  end
@@ -98,9 +98,6 @@ module Karafka
98
98
  details = (error.backtrace || []).join("\n")
99
99
 
100
100
  case type
101
- when 'consumer.prepared.error'
102
- error "Consumer prepared error: #{error}"
103
- error details
104
101
  when 'consumer.consume.error'
105
102
  error "Consumer consuming error: #{error}"
106
103
  error details
@@ -22,7 +22,6 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
- consumer.prepared
26
25
  consumer.consumed
27
26
  consumer.revoked
28
27
  consumer.shutdown
@@ -20,26 +20,20 @@ module Karafka
20
20
  #
21
21
  # It contains slightly better revocation warranties than the regular blocking consumer as
22
22
  # it can stop processing batch of jobs in the middle after the revocation.
23
- class Consumer < Karafka::ActiveJob::Consumer
23
+ class Consumer < Karafka::Pro::BaseConsumer
24
24
  # Runs ActiveJob jobs processing and handles lrj if needed
25
25
  def consume
26
26
  messages.each do |message|
27
27
  # If for any reason we've lost this partition, not worth iterating over new messages
28
28
  # as they are no longer ours
29
- return if revoked?
29
+ break if revoked?
30
30
  break if Karafka::App.stopping?
31
31
 
32
32
  ::ActiveJob::Base.execute(
33
33
  ::ActiveSupport::JSON.decode(message.raw_payload)
34
34
  )
35
35
 
36
- # We check it twice as the job may be long running
37
- return if revoked?
38
-
39
36
  mark_as_consumed(message)
40
-
41
- # Do not process more if we are shutting down
42
- break if Karafka::App.stopping?
43
37
  end
44
38
  end
45
39
  end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Karafka PRO consumer.
15
+ #
16
+ # If you use PRO, all your consumers should inherit (indirectly) from it.
17
+ #
18
+ # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
19
+ # after each batch is processed.
20
+ class BaseConsumer < Karafka::BaseConsumer
21
+ # Pause for tops 31 years
22
+ MAX_PAUSE_TIME = 1_000_000_000_000
23
+
24
+ private_constant :MAX_PAUSE_TIME
25
+
26
+ # Pauses processing of a given partition until we're done with the processing
27
+ # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
+ def on_before_consume
29
+ return unless topic.long_running_job?
30
+
31
+ # This ensures, that when running LRJ with VP, things operate as expected
32
+ coordinator.on_started do |first_group_message|
33
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
34
+ # any messages
35
+ pause(first_group_message.offset, MAX_PAUSE_TIME)
36
+ end
37
+ end
38
+
39
+ # Runs extra logic after consumption that is related to handling long running jobs
40
+ # @note This overwrites the '#on_after_consume' from the base consumer
41
+ def on_after_consume
42
+ coordinator.on_finished do |first_group_message, last_group_message|
43
+ on_after_consume_regular(first_group_message, last_group_message)
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ # Handles the post-consumption flow depending on topic settings
50
+ #
51
+ # @param first_message [Karafka::Messages::Message]
52
+ # @param last_message [Karafka::Messages::Message]
53
+ def on_after_consume_regular(first_message, last_message)
54
+ if coordinator.success?
55
+ coordinator.pause_tracker.reset
56
+
57
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
58
+ # with manual offset management
59
+ # Mark as consumed only if manual offset management is not on
60
+ mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
61
+
62
+ # If this is not a long running job there is nothing for us to do here
63
+ return unless topic.long_running_job?
64
+
65
+ # Once processing is done, we move to the new offset based on commits
66
+ # Here, in case manual offset management is off, we have the new proper offset of a
67
+ # first message from another batch from `@seek_offset`. If manual offset management
68
+ # is on, we move to place where the user indicated it was finished. This can create an
69
+ # interesting (yet valid) corner case, where with manual offset management on and no
70
+ # marking as consumed, we end up with an infinite loop processing same messages over and
71
+ # over again
72
+ seek(@seek_offset || first_message.offset)
73
+
74
+ resume
75
+ else
76
+ # If processing failed, we need to pause
77
+ pause(@seek_offset || first_message.offset)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -15,11 +15,13 @@ module Karafka
15
15
  class Loader
16
16
  # All the pro components that need to be loaded
17
17
  COMPONENTS = %w[
18
+ base_consumer
18
19
  performance_tracker
19
- scheduler
20
- base_consumer_extensions
20
+ processing/scheduler
21
21
  processing/jobs/consume_non_blocking
22
22
  processing/jobs_builder
23
+ processing/coordinator
24
+ processing/partitioner
23
25
  routing/extensions
24
26
  active_job/consumer
25
27
  active_job/dispatcher
@@ -35,14 +37,18 @@ module Karafka
35
37
  def setup(config)
36
38
  COMPONENTS.each { |component| require_relative(component) }
37
39
 
38
- config.internal.scheduler = Scheduler.new
39
- config.internal.jobs_builder = Processing::JobsBuilder.new
40
- config.internal.active_job.consumer = ActiveJob::Consumer
41
- config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
42
- config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
40
+ icfg = config.internal
41
+
42
+ icfg.processing.coordinator_class = Processing::Coordinator
43
+ icfg.processing.partitioner_class = Processing::Partitioner
44
+ icfg.processing.scheduler = Processing::Scheduler.new
45
+ icfg.processing.jobs_builder = Processing::JobsBuilder.new
46
+
47
+ icfg.active_job.consumer_class = ActiveJob::Consumer
48
+ icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
49
+ icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
43
50
 
44
51
  ::Karafka::Routing::Topic.include(Routing::Extensions)
45
- ::Karafka::BaseConsumer.prepend(BaseConsumerExtensions)
46
52
 
47
53
  config.monitor.subscribe(PerformanceTracker.instance)
48
54
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ module Processing
6
+ # Pro coordinator that provides extra orchestration methods useful for parallel processing
7
+ # within the same partition
8
+ class Coordinator < ::Karafka::Processing::Coordinator
9
+ # @param args [Object] anything the base coordinator accepts
10
+ def initialize(*args)
11
+ super
12
+ @on_started_invoked = false
13
+ @on_finished_invoked = false
14
+ @flow_lock = Mutex.new
15
+ end
16
+
17
+ # Starts the coordination process
18
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
19
+ # going to coordinate.
20
+ def start(messages)
21
+ super
22
+
23
+ @mutex.synchronize do
24
+ @on_started_invoked = false
25
+ @on_finished_invoked = false
26
+ @first_message = messages.first
27
+ @last_message = messages.last
28
+ end
29
+ end
30
+
31
+ # @return [Boolean] is the coordinated work finished or not
32
+ def finished?
33
+ @running_jobs.zero?
34
+ end
35
+
36
+ # Runs given code only once per all the coordinated jobs upon starting first of them
37
+ def on_started
38
+ @flow_lock.synchronize do
39
+ return if @on_started_invoked
40
+
41
+ @on_started_invoked = true
42
+
43
+ yield(@first_message, @last_message)
44
+ end
45
+ end
46
+
47
+ # Runs once when all the work that is suppose to be coordinated is finished
48
+ # It runs once per all the coordinated jobs and should be used to run any type of post
49
+ # jobs coordination processing execution
50
+ def on_finished
51
+ @flow_lock.synchronize do
52
+ return unless finished?
53
+ return if @on_finished_invoked
54
+
55
+ @on_finished_invoked = true
56
+
57
+ yield(@first_message, @last_message)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -26,7 +26,7 @@ module Karafka
26
26
  # management. This layer of the framework knows nothing about Kafka messages consumption.
27
27
  class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
28
28
  # Releases the blocking lock after it is done with the preparation phase for this job
29
- def prepare
29
+ def before_call
30
30
  super
31
31
  @non_blocking = true
32
32
  end