karafka 2.0.0.beta3 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -15
  4. data/CHANGELOG.md +37 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +6 -6
  7. data/README.md +2 -10
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/bin/wait_for_kafka +20 -0
  13. data/docker-compose.yml +32 -13
  14. data/karafka.gemspec +1 -1
  15. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  16. data/lib/karafka/app.rb +2 -1
  17. data/lib/karafka/base_consumer.rb +59 -46
  18. data/lib/karafka/connection/client.rb +60 -14
  19. data/lib/karafka/connection/listener.rb +37 -11
  20. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  21. data/lib/karafka/contracts/config.rb +18 -4
  22. data/lib/karafka/contracts/server_cli_options.rb +1 -1
  23. data/lib/karafka/errors.rb +3 -0
  24. data/lib/karafka/instrumentation/logger_listener.rb +0 -3
  25. data/lib/karafka/instrumentation/monitor.rb +0 -1
  26. data/lib/karafka/pro/active_job/consumer.rb +2 -8
  27. data/lib/karafka/pro/base_consumer.rb +82 -0
  28. data/lib/karafka/pro/loader.rb +14 -8
  29. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  30. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
  31. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  32. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  33. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  34. data/lib/karafka/pro/routing/extensions.rb +6 -0
  35. data/lib/karafka/processing/coordinator.rb +88 -0
  36. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  37. data/lib/karafka/processing/executor.rb +16 -9
  38. data/lib/karafka/processing/executors_buffer.rb +46 -15
  39. data/lib/karafka/processing/jobs/base.rb +8 -3
  40. data/lib/karafka/processing/jobs/consume.rb +11 -4
  41. data/lib/karafka/processing/jobs_builder.rb +3 -2
  42. data/lib/karafka/processing/partitioner.rb +22 -0
  43. data/lib/karafka/processing/result.rb +29 -0
  44. data/lib/karafka/processing/scheduler.rb +22 -0
  45. data/lib/karafka/processing/worker.rb +2 -2
  46. data/lib/karafka/routing/consumer_group.rb +1 -1
  47. data/lib/karafka/routing/topic.rb +14 -0
  48. data/lib/karafka/setup/config.rb +20 -10
  49. data/lib/karafka/version.rb +1 -1
  50. data.tar.gz.sig +0 -0
  51. metadata +16 -8
  52. metadata.gz.sig +0 -0
  53. data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
  54. data/lib/karafka/pro/scheduler.rb +0 -54
  55. data/lib/karafka/scheduler.rb +0 -20
@@ -36,6 +36,12 @@ module Karafka
36
36
  # Marks if we need to offset. If we did not store offsets, we should not commit the offset
37
37
  # position as it will crash rdkafka
38
38
  @offsetting = false
39
+ # We need to keep track of what we have paused for resuming
40
+ # In case we loose partition, we still need to resume it, otherwise it won't be fetched
41
+ # again if we get reassigned to it later on. We need to keep them as after revocation we
42
+ # no longer may be able to fetch them from Kafka. We could build them but it is easier
43
+ # to just keep them here and use if needed when cannot be obtained
44
+ @paused_tpls = Hash.new { |h, k| h[k] = {} }
39
45
  end
40
46
 
41
47
  # Fetches messages within boundaries defined by the settings (time, size, topics, etc).
@@ -45,12 +51,13 @@ module Karafka
45
51
  # @note This method should not be executed from many threads at the same time
46
52
  def batch_poll
47
53
  time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
48
- time_poll.start
49
54
 
50
55
  @buffer.clear
51
56
  @rebalance_manager.clear
52
57
 
53
58
  loop do
59
+ time_poll.start
60
+
54
61
  # Don't fetch more messages if we do not have any time left
55
62
  break if time_poll.exceeded?
56
63
  # Don't fetch more messages if we've fetched max as we've wanted
@@ -69,7 +76,11 @@ module Karafka
69
76
  # If partition revocation happens, we need to remove messages from revoked partitions
70
77
  # as well as ensure we do not have duplicated due to the offset reset for partitions
71
78
  # that we got assigned
72
- remove_revoked_and_duplicated_messages if @rebalance_manager.revoked_partitions?
79
+ # We also do early break, so the information about rebalance is used as soon as possible
80
+ if @rebalance_manager.changed?
81
+ remove_revoked_and_duplicated_messages
82
+ break
83
+ end
73
84
 
74
85
  # Finally once we've (potentially) removed revoked, etc, if no messages were returned
75
86
  # we can break.
@@ -144,10 +155,14 @@ module Karafka
144
155
 
145
156
  internal_commit_offsets(async: false)
146
157
 
158
+ # Here we do not use our cached tpls because we should not try to pause something we do
159
+ # not own anymore.
147
160
  tpl = topic_partition_list(topic, partition)
148
161
 
149
162
  return unless tpl
150
163
 
164
+ @paused_tpls[topic][partition] = tpl
165
+
151
166
  @kafka.pause(tpl)
152
167
 
153
168
  @kafka.seek(pause_msg)
@@ -169,9 +184,13 @@ module Karafka
169
184
  # We can skip performance penalty since resuming should not happen too often
170
185
  internal_commit_offsets(async: false)
171
186
 
172
- tpl = topic_partition_list(topic, partition)
187
+ # If we were not able, let's try to reuse the one we have (if we have)
188
+ tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
173
189
 
174
190
  return unless tpl
191
+ # If we did not have it, it means we never paused this partition, thus no resume should
192
+ # happen in the first place
193
+ return unless @paused_tpls[topic].delete(partition)
175
194
 
176
195
  @kafka.resume(tpl)
177
196
  ensure
@@ -190,6 +209,7 @@ module Karafka
190
209
  # Marks given message as consumed.
191
210
  #
192
211
  # @param [Karafka::Messages::Message] message that we want to mark as processed
212
+ # @return [Boolean] true if successful. False if we no longer own given partition
193
213
  # @note This method won't trigger automatic offsets commits, rather relying on the offset
194
214
  # check-pointing trigger that happens with each batch processed
195
215
  def mark_as_consumed(message)
@@ -199,8 +219,10 @@ module Karafka
199
219
  # Marks a given message as consumed and commits the offsets in a blocking way.
200
220
  #
201
221
  # @param [Karafka::Messages::Message] message that we want to mark as processed
222
+ # @return [Boolean] true if successful. False if we no longer own given partition
202
223
  def mark_as_consumed!(message)
203
- mark_as_consumed(message)
224
+ return false unless mark_as_consumed(message)
225
+
204
226
  commit_offsets!
205
227
  end
206
228
 
@@ -211,28 +233,42 @@ module Karafka
211
233
  @mutex.synchronize do
212
234
  @closed = false
213
235
  @offsetting = false
236
+ @paused_tpls.clear
214
237
  @kafka = build_consumer
215
238
  end
216
239
  end
217
240
 
218
241
  private
219
242
 
243
+ # When we cannot store an offset, it means we no longer own the partition
244
+ #
220
245
  # Non thread-safe offset storing method
221
246
  # @param message [Karafka::Messages::Message]
247
+ # @return [Boolean] true if we could store the offset (if we still own the partition)
222
248
  def internal_store_offset(message)
223
249
  @offsetting = true
224
250
  @kafka.store_offset(message)
251
+ true
252
+ rescue Rdkafka::RdkafkaError => e
253
+ return false if e.code == :assignment_lost
254
+ return false if e.code == :state
255
+
256
+ raise e
225
257
  end
226
258
 
227
259
  # Non thread-safe message committing method
228
260
  # @param async [Boolean] should the commit happen async or sync (async by default)
261
+ # @return [Boolean] true if offset commit worked, false if we've lost the assignment
229
262
  def internal_commit_offsets(async: true)
230
- return unless @offsetting
263
+ return true unless @offsetting
231
264
 
232
265
  @kafka.commit(nil, async)
233
266
  @offsetting = false
267
+
268
+ true
234
269
  rescue Rdkafka::RdkafkaError => e
235
- return if e.code == :no_offset
270
+ return false if e.code == :assignment_lost
271
+ return false if e.code == :no_offset
236
272
 
237
273
  raise e
238
274
  end
@@ -250,7 +286,8 @@ module Karafka
250
286
 
251
287
  @kafka.close
252
288
  @buffer.clear
253
- @rebalance_manager.clear
289
+ # @note We do not clear rebalance manager here as we may still have revocation info here
290
+ # that we want to consider valid prior to running another reconnection
254
291
  end
255
292
  end
256
293
 
@@ -279,30 +316,39 @@ module Karafka
279
316
 
280
317
  time_poll.start
281
318
 
282
- @kafka.poll(time_poll.remaining)
319
+ @kafka.poll(timeout)
283
320
  rescue ::Rdkafka::RdkafkaError => e
284
- raise if time_poll.attempts > MAX_POLL_RETRIES
285
- raise unless time_poll.retryable?
286
-
321
+ # We return nil, so we do not restart until running the whole loop
322
+ # This allows us to run revocation jobs and other things and we will pick up new work
323
+ # next time after dispatching all the things that are needed
324
+ #
325
+ # If we would retry here, the client reset would become transparent and we would not have
326
+ # a chance to take any actions
287
327
  case e.code
288
328
  when :max_poll_exceeded # -147
289
329
  reset
330
+ return nil
290
331
  when :transport # -195
291
332
  reset
333
+ return nil
292
334
  when :rebalance_in_progress # -27
293
335
  reset
336
+ return nil
294
337
  when :not_coordinator # 16
295
338
  reset
339
+ return nil
296
340
  when :network_exception # 13
297
341
  reset
342
+ return nil
298
343
  end
299
344
 
300
- time_poll.checkpoint
301
-
345
+ raise if time_poll.attempts > MAX_POLL_RETRIES
302
346
  raise unless time_poll.retryable?
303
347
 
348
+ time_poll.checkpoint
304
349
  time_poll.backoff
305
350
 
351
+ # On unknown errors we do our best to retry and handle them before raising
306
352
  retry
307
353
  end
308
354
 
@@ -346,7 +392,7 @@ module Karafka
346
392
  # we are no longer responsible in a given process for processing those messages and they
347
393
  # should have been picked up by a different process.
348
394
  def remove_revoked_and_duplicated_messages
349
- @rebalance_manager.revoked_partitions.each do |topic, partitions|
395
+ @rebalance_manager.lost_partitions.each do |topic, partitions|
350
396
  partitions.each do |partition|
351
397
  @buffer.delete(topic, partition)
352
398
  end
@@ -18,15 +18,18 @@ module Karafka
18
18
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
19
19
  # @return [Karafka::Connection::Listener] listener instance
20
20
  def initialize(subscription_group, jobs_queue)
21
+ proc_config = ::Karafka::App.config.internal.processing
22
+
21
23
  @id = SecureRandom.uuid
22
24
  @subscription_group = subscription_group
23
25
  @jobs_queue = jobs_queue
24
- @jobs_builder = ::Karafka::App.config.internal.jobs_builder
25
- @pauses_manager = PausesManager.new
26
+ @coordinators = Processing::CoordinatorsBuffer.new
26
27
  @client = Client.new(@subscription_group)
27
28
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
29
+ @jobs_builder = proc_config.jobs_builder
30
+ @partitioner = proc_config.partitioner_class.new(subscription_group)
28
31
  # We reference scheduler here as it is much faster than fetching this each time
29
- @scheduler = ::Karafka::App.config.internal.scheduler
32
+ @scheduler = proc_config.scheduler
30
33
  # We keep one buffer for messages to preserve memory and not allocate extra objects
31
34
  # We can do this that way because we always first schedule jobs using messages before we
32
35
  # fetch another batch.
@@ -86,6 +89,9 @@ module Karafka
86
89
  build_and_schedule_revoke_lost_partitions_jobs
87
90
 
88
91
  # We wait only on jobs from our subscription group. Other groups are independent.
92
+ # This will block on revoked jobs until they are finished. Those are not meant to last
93
+ # long and should not have any bigger impact on the system. Doing this in a blocking way
94
+ # simplifies the overall design and prevents from race conditions
89
95
  wait
90
96
 
91
97
  build_and_schedule_consumption_jobs
@@ -136,7 +142,7 @@ module Karafka
136
142
 
137
143
  # Resumes processing of partitions that were paused due to an error.
138
144
  def resume_paused_partitions
139
- @pauses_manager.resume do |topic, partition|
145
+ @coordinators.resume do |topic, partition|
140
146
  @client.resume(topic, partition)
141
147
  end
142
148
  end
@@ -152,9 +158,21 @@ module Karafka
152
158
 
153
159
  revoked_partitions.each do |topic, partitions|
154
160
  partitions.each do |partition|
155
- pause_tracker = @pauses_manager.fetch(topic, partition)
156
- executor = @executors.fetch(topic, partition, pause_tracker)
157
- jobs << @jobs_builder.revoked(executor)
161
+ @coordinators.revoke(topic, partition)
162
+
163
+ # There may be a case where we have lost partition of which data we have never
164
+ # processed (if it was assigned and revoked really fast), thus we may not have it
165
+ # here. In cases like this, we do not run a revocation job
166
+ @executors.find_all(topic, partition).each do |executor|
167
+ jobs << @jobs_builder.revoked(executor)
168
+ end
169
+
170
+ # We need to remove all the executors of a given topic partition that we have lost, so
171
+ # next time we pick up it's work, new executors kick in. This may be needed especially
172
+ # for LRJ where we could end up with a race condition
173
+ # This revocation needs to happen after the jobs are scheduled, otherwise they would
174
+ # be scheduled with new executors instead of old
175
+ @executors.revoke(topic, partition)
158
176
  end
159
177
  end
160
178
 
@@ -191,11 +209,19 @@ module Karafka
191
209
  jobs = []
192
210
 
193
211
  @messages_buffer.each do |topic, partition, messages|
194
- pause_tracker = @pauses_manager.fetch(topic, partition)
212
+ coordinator = @coordinators.find_or_create(topic, partition)
213
+
214
+ # Start work coordination for this topic partition
215
+ coordinator.start(messages)
195
216
 
196
- executor = @executors.fetch(topic, partition, pause_tracker)
217
+ @partitioner.call(topic, messages) do |group_id, partition_messages|
218
+ # Count the job we're going to create here
219
+ coordinator.increment
197
220
 
198
- jobs << @jobs_builder.consume(executor, messages)
221
+ executor = @executors.find_or_create(topic, partition, group_id)
222
+
223
+ jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
224
+ end
199
225
  end
200
226
 
201
227
  @scheduler.schedule_consumption(@jobs_queue, jobs)
@@ -231,7 +257,7 @@ module Karafka
231
257
  @jobs_queue.wait(@subscription_group.id)
232
258
  @jobs_queue.clear(@subscription_group.id)
233
259
  @client.reset
234
- @pauses_manager = PausesManager.new
260
+ @coordinators.reset
235
261
  @executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
236
262
  end
237
263
  end
@@ -18,13 +18,15 @@ module Karafka
18
18
  # Empty array for internal usage not to create new objects
19
19
  EMPTY_ARRAY = [].freeze
20
20
 
21
+ attr_reader :assigned_partitions, :revoked_partitions
22
+
21
23
  private_constant :EMPTY_ARRAY
22
24
 
23
25
  # @return [RebalanceManager]
24
26
  def initialize
25
27
  @assigned_partitions = {}
26
28
  @revoked_partitions = {}
27
- @lost_partitions = {}
29
+ @changed = false
28
30
  end
29
31
 
30
32
  # Resets the rebalance manager state
@@ -33,26 +35,12 @@ module Karafka
33
35
  def clear
34
36
  @assigned_partitions.clear
35
37
  @revoked_partitions.clear
36
- @lost_partitions.clear
37
- end
38
-
39
- # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
40
- # which we've lost partitions and array with ids of the partitions as the value
41
- # @note We do not consider as lost topics and partitions that got revoked and assigned
42
- def revoked_partitions
43
- return @revoked_partitions if @revoked_partitions.empty?
44
- return @lost_partitions unless @lost_partitions.empty?
45
-
46
- @revoked_partitions.each do |topic, partitions|
47
- @lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
48
- end
49
-
50
- @lost_partitions
38
+ @changed = false
51
39
  end
52
40
 
53
- # @return [Boolean] true if any partitions were revoked
54
- def revoked_partitions?
55
- !revoked_partitions.empty?
41
+ # @return [Boolean] indicates a state change in the partitions assignment
42
+ def changed?
43
+ @changed
56
44
  end
57
45
 
58
46
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
@@ -62,6 +50,7 @@ module Karafka
62
50
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
63
51
  def on_partitions_assigned(_, partitions)
64
52
  @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
53
+ @changed = true
65
54
  end
66
55
 
67
56
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
@@ -71,6 +60,18 @@ module Karafka
71
60
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
72
61
  def on_partitions_revoked(_, partitions)
73
62
  @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
63
+ @changed = true
64
+ end
65
+
66
+ # We consider as lost only partitions that were taken away and not re-assigned back to us
67
+ def lost_partitions
68
+ lost_partitions = {}
69
+
70
+ revoked_partitions.each do |topic, partitions|
71
+ lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
72
+ end
73
+
74
+ lost_partitions
74
75
  end
75
76
  end
76
77
  end
@@ -30,12 +30,26 @@ module Karafka
30
30
 
31
31
  # We validate internals just to be sure, that they are present and working
32
32
  required(:internal).schema do
33
- required(:routing_builder)
34
- required(:subscription_groups_builder)
35
- required(:jobs_builder)
36
33
  required(:status)
37
34
  required(:process)
38
- required(:scheduler)
35
+
36
+ required(:routing).schema do
37
+ required(:builder)
38
+ required(:subscription_groups_builder)
39
+ end
40
+
41
+ required(:processing).schema do
42
+ required(:jobs_builder)
43
+ required(:scheduler)
44
+ required(:coordinator_class)
45
+ required(:partitioner_class)
46
+ end
47
+
48
+ required(:active_job).schema do
49
+ required(:dispatcher)
50
+ required(:job_options_contract)
51
+ required(:consumer_class)
52
+ end
39
53
  end
40
54
  end
41
55
 
@@ -12,7 +12,7 @@ module Karafka
12
12
  # If there were no consumer_groups declared in the server cli, it means that we will
13
13
  # run all of them and no need to validate them here at all
14
14
  if !value.nil? &&
15
- !(value - Karafka::App.config.internal.routing_builder.map(&:name)).empty?
15
+ !(value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
16
16
  key(:consumer_groups).failure(:consumer_groups_inclusion)
17
17
  end
18
18
  end
@@ -47,5 +47,8 @@ module Karafka
47
47
  # Used to instrument this error into the error notifications
48
48
  # We do not raise it so we won't crash deployed systems
49
49
  ExpiredLicenseTokenError = Class.new(BaseError)
50
+
51
+ # This should never happen. Please open an issue if it does.
52
+ InvalidCoordinatorState = Class.new(BaseError)
50
53
  end
51
54
  end
@@ -98,9 +98,6 @@ module Karafka
98
98
  details = (error.backtrace || []).join("\n")
99
99
 
100
100
  case type
101
- when 'consumer.prepared.error'
102
- error "Consumer prepared error: #{error}"
103
- error details
104
101
  when 'consumer.consume.error'
105
102
  error "Consumer consuming error: #{error}"
106
103
  error details
@@ -22,7 +22,6 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
- consumer.prepared
26
25
  consumer.consumed
27
26
  consumer.revoked
28
27
  consumer.shutdown
@@ -20,26 +20,20 @@ module Karafka
20
20
  #
21
21
  # It contains slightly better revocation warranties than the regular blocking consumer as
22
22
  # it can stop processing batch of jobs in the middle after the revocation.
23
- class Consumer < Karafka::ActiveJob::Consumer
23
+ class Consumer < Karafka::Pro::BaseConsumer
24
24
  # Runs ActiveJob jobs processing and handles lrj if needed
25
25
  def consume
26
26
  messages.each do |message|
27
27
  # If for any reason we've lost this partition, not worth iterating over new messages
28
28
  # as they are no longer ours
29
- return if revoked?
29
+ break if revoked?
30
30
  break if Karafka::App.stopping?
31
31
 
32
32
  ::ActiveJob::Base.execute(
33
33
  ::ActiveSupport::JSON.decode(message.raw_payload)
34
34
  )
35
35
 
36
- # We check it twice as the job may be long running
37
- return if revoked?
38
-
39
36
  mark_as_consumed(message)
40
-
41
- # Do not process more if we are shutting down
42
- break if Karafka::App.stopping?
43
37
  end
44
38
  end
45
39
  end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Karafka PRO consumer.
15
+ #
16
+ # If you use PRO, all your consumers should inherit (indirectly) from it.
17
+ #
18
+ # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
19
+ # after each batch is processed.
20
+ class BaseConsumer < Karafka::BaseConsumer
21
+ # Pause for tops 31 years
22
+ MAX_PAUSE_TIME = 1_000_000_000_000
23
+
24
+ private_constant :MAX_PAUSE_TIME
25
+
26
+ # Pauses processing of a given partition until we're done with the processing
27
+ # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
+ def on_before_consume
29
+ return unless topic.long_running_job?
30
+
31
+ # This ensures, that when running LRJ with VP, things operate as expected
32
+ coordinator.on_started do |first_group_message|
33
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
34
+ # any messages
35
+ pause(first_group_message.offset, MAX_PAUSE_TIME)
36
+ end
37
+ end
38
+
39
+ # Runs extra logic after consumption that is related to handling long running jobs
40
+ # @note This overwrites the '#on_after_consume' from the base consumer
41
+ def on_after_consume
42
+ coordinator.on_finished do |first_group_message, last_group_message|
43
+ on_after_consume_regular(first_group_message, last_group_message)
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ # Handles the post-consumption flow depending on topic settings
50
+ #
51
+ # @param first_message [Karafka::Messages::Message]
52
+ # @param last_message [Karafka::Messages::Message]
53
+ def on_after_consume_regular(first_message, last_message)
54
+ if coordinator.success?
55
+ coordinator.pause_tracker.reset
56
+
57
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
58
+ # with manual offset management
59
+ # Mark as consumed only if manual offset management is not on
60
+ mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
61
+
62
+ # If this is not a long running job there is nothing for us to do here
63
+ return unless topic.long_running_job?
64
+
65
+ # Once processing is done, we move to the new offset based on commits
66
+ # Here, in case manual offset management is off, we have the new proper offset of a
67
+ # first message from another batch from `@seek_offset`. If manual offset management
68
+ # is on, we move to place where the user indicated it was finished. This can create an
69
+ # interesting (yet valid) corner case, where with manual offset management on and no
70
+ # marking as consumed, we end up with an infinite loop processing same messages over and
71
+ # over again
72
+ seek(@seek_offset || first_message.offset)
73
+
74
+ resume
75
+ else
76
+ # If processing failed, we need to pause
77
+ pause(@seek_offset || first_message.offset)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -15,11 +15,13 @@ module Karafka
15
15
  class Loader
16
16
  # All the pro components that need to be loaded
17
17
  COMPONENTS = %w[
18
+ base_consumer
18
19
  performance_tracker
19
- scheduler
20
- base_consumer_extensions
20
+ processing/scheduler
21
21
  processing/jobs/consume_non_blocking
22
22
  processing/jobs_builder
23
+ processing/coordinator
24
+ processing/partitioner
23
25
  routing/extensions
24
26
  active_job/consumer
25
27
  active_job/dispatcher
@@ -35,14 +37,18 @@ module Karafka
35
37
  def setup(config)
36
38
  COMPONENTS.each { |component| require_relative(component) }
37
39
 
38
- config.internal.scheduler = Scheduler.new
39
- config.internal.jobs_builder = Processing::JobsBuilder.new
40
- config.internal.active_job.consumer = ActiveJob::Consumer
41
- config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
42
- config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
40
+ icfg = config.internal
41
+
42
+ icfg.processing.coordinator_class = Processing::Coordinator
43
+ icfg.processing.partitioner_class = Processing::Partitioner
44
+ icfg.processing.scheduler = Processing::Scheduler.new
45
+ icfg.processing.jobs_builder = Processing::JobsBuilder.new
46
+
47
+ icfg.active_job.consumer_class = ActiveJob::Consumer
48
+ icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
49
+ icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
43
50
 
44
51
  ::Karafka::Routing::Topic.include(Routing::Extensions)
45
- ::Karafka::BaseConsumer.prepend(BaseConsumerExtensions)
46
52
 
47
53
  config.monitor.subscribe(PerformanceTracker.instance)
48
54
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Pro
5
+ module Processing
6
+ # Pro coordinator that provides extra orchestration methods useful for parallel processing
7
+ # within the same partition
8
+ class Coordinator < ::Karafka::Processing::Coordinator
9
+ # @param args [Object] anything the base coordinator accepts
10
+ def initialize(*args)
11
+ super
12
+ @on_started_invoked = false
13
+ @on_finished_invoked = false
14
+ @flow_lock = Mutex.new
15
+ end
16
+
17
+ # Starts the coordination process
18
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
19
+ # going to coordinate.
20
+ def start(messages)
21
+ super
22
+
23
+ @mutex.synchronize do
24
+ @on_started_invoked = false
25
+ @on_finished_invoked = false
26
+ @first_message = messages.first
27
+ @last_message = messages.last
28
+ end
29
+ end
30
+
31
+ # @return [Boolean] is the coordinated work finished or not
32
+ def finished?
33
+ @running_jobs.zero?
34
+ end
35
+
36
+ # Runs given code only once per all the coordinated jobs upon starting first of them
37
+ def on_started
38
+ @flow_lock.synchronize do
39
+ return if @on_started_invoked
40
+
41
+ @on_started_invoked = true
42
+
43
+ yield(@first_message, @last_message)
44
+ end
45
+ end
46
+
47
+ # Runs once when all the work that is suppose to be coordinated is finished
48
+ # It runs once per all the coordinated jobs and should be used to run any type of post
49
+ # jobs coordination processing execution
50
+ def on_finished
51
+ @flow_lock.synchronize do
52
+ return unless finished?
53
+ return if @on_finished_invoked
54
+
55
+ @on_finished_invoked = true
56
+
57
+ yield(@first_message, @last_message)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -26,7 +26,7 @@ module Karafka
26
26
  # management. This layer of the framework knows nothing about Kafka messages consumption.
27
27
  class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
28
28
  # Releases the blocking lock after it is done with the preparation phase for this job
29
- def prepare
29
+ def before_call
30
30
  super
31
31
  @non_blocking = true
32
32
  end