karafka 2.0.26 → 2.0.28

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,19 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.26)
5
- karafka-core (>= 2.0.8, < 3.0.0)
4
+ karafka (2.0.28)
5
+ karafka-core (>= 2.0.9, < 3.0.0)
6
6
  thor (>= 0.20)
7
- waterdrop (>= 2.4.7, < 3.0.0)
7
+ waterdrop (>= 2.4.9, < 3.0.0)
8
8
  zeitwerk (~> 2.3)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.0.4)
14
- activesupport (= 7.0.4)
13
+ activejob (7.0.4.1)
14
+ activesupport (= 7.0.4.1)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.0.4)
16
+ activesupport (7.0.4.1)
17
17
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
18
  i18n (>= 1.6, < 2)
19
19
  minitest (>= 5.1)
@@ -25,30 +25,30 @@ GEM
25
25
  factory_bot (6.2.1)
26
26
  activesupport (>= 5.0.0)
27
27
  ffi (1.15.5)
28
- globalid (1.0.0)
28
+ globalid (1.0.1)
29
29
  activesupport (>= 5.0)
30
30
  i18n (1.12.0)
31
31
  concurrent-ruby (~> 1.0)
32
- karafka-core (2.0.8)
32
+ karafka-core (2.0.9)
33
33
  concurrent-ruby (>= 1.1)
34
- rdkafka (>= 0.12)
35
- mini_portile2 (2.8.1)
36
- minitest (5.17.0)
37
- rake (13.0.6)
38
- rdkafka (0.12.0)
34
+ karafka-rdkafka (>= 0.12)
35
+ karafka-rdkafka (0.12.0)
39
36
  ffi (~> 1.15)
40
37
  mini_portile2 (~> 2.6)
41
38
  rake (> 12)
39
+ mini_portile2 (2.8.1)
40
+ minitest (5.17.0)
41
+ rake (13.0.6)
42
42
  rspec (3.12.0)
43
43
  rspec-core (~> 3.12.0)
44
44
  rspec-expectations (~> 3.12.0)
45
45
  rspec-mocks (~> 3.12.0)
46
46
  rspec-core (3.12.0)
47
47
  rspec-support (~> 3.12.0)
48
- rspec-expectations (3.12.1)
48
+ rspec-expectations (3.12.2)
49
49
  diff-lcs (>= 1.2.0, < 2.0)
50
50
  rspec-support (~> 3.12.0)
51
- rspec-mocks (3.12.1)
51
+ rspec-mocks (3.12.3)
52
52
  diff-lcs (>= 1.2.0, < 2.0)
53
53
  rspec-support (~> 3.12.0)
54
54
  rspec-support (3.12.0)
@@ -61,14 +61,12 @@ GEM
61
61
  thor (1.2.1)
62
62
  tzinfo (2.0.5)
63
63
  concurrent-ruby (~> 1.0)
64
- waterdrop (2.4.7)
65
- karafka-core (>= 2.0.7, < 3.0.0)
64
+ waterdrop (2.4.9)
65
+ karafka-core (>= 2.0.9, < 3.0.0)
66
66
  zeitwerk (~> 2.3)
67
67
  zeitwerk (2.6.6)
68
68
 
69
69
  PLATFORMS
70
- arm64-darwin-21
71
- x86_64-darwin-21
72
70
  x86_64-linux
73
71
 
74
72
  DEPENDENCIES
@@ -5,7 +5,7 @@ en:
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
6
  manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
7
7
  long_running_job.active_format: needs to be either true or false
8
- dead_letter_queue_not_with_virtual_partitions: cannot be used together with Virtual Partitions
8
+ dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
9
9
 
10
10
  config:
11
11
  encryption.active_format: 'needs to be either true or false'
data/karafka.gemspec CHANGED
@@ -21,13 +21,11 @@ Gem::Specification.new do |spec|
21
21
  without having to focus on things that are not your business domain.
22
22
  DESC
23
23
 
24
- spec.add_dependency 'karafka-core', '>= 2.0.8', '< 3.0.0'
24
+ spec.add_dependency 'karafka-core', '>= 2.0.9', '< 3.0.0'
25
25
  spec.add_dependency 'thor', '>= 0.20'
26
- spec.add_dependency 'waterdrop', '>= 2.4.7', '< 3.0.0'
26
+ spec.add_dependency 'waterdrop', '>= 2.4.9', '< 3.0.0'
27
27
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
28
 
29
- spec.required_ruby_version = '>= 2.7.0'
30
-
31
29
  if $PROGRAM_NAME.end_with?('gem')
32
30
  spec.signing_key = File.expand_path('~/.ssh/gem-private_key.pem')
33
31
  end
@@ -267,7 +267,7 @@ module Karafka
267
267
  # Start work coordination for this topic partition
268
268
  coordinator.start(messages)
269
269
 
270
- @partitioner.call(topic, messages) do |group_id, partition_messages|
270
+ @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
271
271
  # Count the job we're going to create here
272
272
  coordinator.increment
273
273
  executor = @executors.find_or_create(topic, partition, group_id)
@@ -36,8 +36,9 @@ module Karafka
36
36
  )
37
37
 
38
38
  # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
39
- # this could create random markings
40
- next if topic.virtual_partitions?
39
+ # this could create random markings.
40
+ # The exception here is the collapsed state where we can move one after another
41
+ next if topic.virtual_partitions? && !collapsed?
41
42
 
42
43
  mark_as_consumed(message)
43
44
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Manages the collapse of virtual partitions
18
+ # Since any non-virtual partition is actually a virtual partition of size one, we can use
19
+ # it in a generic manner without having to distinguish between those cases.
20
+ #
21
+ # We need to have notion of the offset until we want to collapse because upon pause and retry
22
+ # rdkafka may purge the buffer. This means, that we may end up with smaller or bigger
23
+ # (different) dataset and without tracking the end of collapse, there would be a chance for
24
+ # things to flicker. Tracking allows us to ensure, that collapse is happening until all the
25
+ # messages from the corrupted batch are processed.
26
+ class Collapser
27
+ # When initialized, nothing is collapsed
28
+ def initialize
29
+ @collapsed = false
30
+ @until_offset = -1
31
+ @mutex = Mutex.new
32
+ end
33
+
34
+ # @return [Boolean] Should we collapse into a single consumer
35
+ def collapsed?
36
+ @collapsed
37
+ end
38
+
39
+ # Collapse until given offset. Until given offset is encountered or offset bigger than that
40
+ # we keep collapsing.
41
+ # @param offset [Integer] offset until which we keep the collapse
42
+ def collapse_until!(offset)
43
+ @mutex.synchronize do
44
+ # We check it here in case after a pause and re-fetch we would get less messages and
45
+ # one of them would cause an error. We do not want to overwrite the offset here unless
46
+ # it is bigger.
47
+ @until_offset = offset if offset > @until_offset
48
+ end
49
+ end
50
+
51
+ # Sets the collapse state based on the first collective offset that we are going to process
52
+ # and makes the decision whether or not we need to still keep the collapse.
53
+ # @param first_offset [Integer] first offset from a collective batch
54
+ def refresh!(first_offset)
55
+ @mutex.synchronize do
56
+ @collapsed = first_offset < @until_offset
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -23,6 +23,7 @@ module Karafka
23
23
 
24
24
  @executed = []
25
25
  @flow_lock = Mutex.new
26
+ @collapser = Collapser.new
26
27
  end
27
28
 
28
29
  # Starts the coordination process
@@ -31,12 +32,28 @@ module Karafka
31
32
  def start(messages)
32
33
  super
33
34
 
35
+ @collapser.refresh!(messages.first.offset)
36
+
34
37
  @mutex.synchronize do
35
38
  @executed.clear
36
39
  @last_message = messages.last
37
40
  end
38
41
  end
39
42
 
43
+ # Sets the consumer failure status and additionally starts the collapse until
44
+ #
45
+ # @param consumer [Karafka::BaseConsumer] consumer that failed
46
+ # @param error [StandardError] error from the failure
47
+ def failure!(consumer, error)
48
+ super
49
+ @collapser.collapse_until!(@last_message.offset + 1)
50
+ end
51
+
52
+ # @return [Boolean] are we in a collapsed state at the moment
53
+ def collapsed?
54
+ @collapser.collapsed?
55
+ end
56
+
40
57
  # @return [Boolean] is the coordinated work finished or not
41
58
  def finished?
42
59
  @running_jobs.zero?
@@ -18,15 +18,29 @@ module Karafka
18
18
  class Partitioner < ::Karafka::Processing::Partitioner
19
19
  # @param topic [String] topic name
20
20
  # @param messages [Array<Karafka::Messages::Message>] karafka messages
21
+ # @param coordinator [Karafka::Pro::Processing::Coordinator] processing coordinator that
22
+ # will be used with those messages
21
23
  # @yieldparam [Integer] group id
22
24
  # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
23
- def call(topic, messages)
25
+ def call(topic, messages, coordinator)
24
26
  ktopic = @subscription_group.topics.find(topic)
25
27
 
26
- # We only partition work if we have a virtual partitioner and more than one thread to
27
- # process the data. With one thread it is not worth partitioning the work as the work
28
- # itself will be assigned to one thread (pointless work)
29
- if ktopic.virtual_partitions? && ktopic.virtual_partitions.max_partitions > 1
28
+ # We only partition work if we have:
29
+ # - a virtual partitioner
30
+ # - more than one thread to process the data
31
+ # - collective is not collapsed via coordinator
32
+ #
33
+ # With one thread it is not worth partitioning the work as the work itself will be
34
+ # assigned to one thread (pointless work)
35
+ #
36
+ # We collapse the partitioning on errors because we "regain" full ordering on a batch
37
+ # that potentially contains the data that caused the error.
38
+ #
39
+ # This is great because it allows us to run things without the parallelization that adds
40
+ # a bit of uncertainty and allows us to use DLQ and safely skip messages if needed.
41
+ if ktopic.virtual_partitions? &&
42
+ ktopic.virtual_partitions.max_partitions > 1 &&
43
+ !coordinator.collapsed?
30
44
  # We need to reduce it to the max concurrency, so the group_id is not a direct effect
31
45
  # of the end user action. Otherwise the persistence layer for consumers would cache
32
46
  # it forever and it would cause memory leaks
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Strategies
18
+ # ActiveJob enabled
19
+ # DLQ enabled
20
+ # Long-Running Job enabled
21
+ # Manual offset management enabled
22
+ # Virtual Partitions enabled
23
+ #
24
+ # This case is a bit of special. Please see the `AjDlqMom` for explanation on how the
25
+ # offset management works in this case.
26
+ module AjDlqLrjMomVp
27
+ include AjDlqMomVp
28
+ include AjLrjMom
29
+
30
+ # Features for this strategy
31
+ FEATURES = %i[
32
+ active_job
33
+ long_running_job
34
+ manual_offset_management
35
+ dead_letter_queue
36
+ virtual_partitions
37
+ ].freeze
38
+
39
+ # This strategy is pretty much as non VP one because of the collapse
40
+ def handle_after_consume
41
+ coordinator.on_finished do |last_group_message|
42
+ if coordinator.success?
43
+ coordinator.pause_tracker.reset
44
+
45
+ return if revoked?
46
+ return if Karafka::App.stopping?
47
+
48
+ # Since we have VP here we do not commit intermediate offsets and need to commit
49
+ # them here. We do commit in collapsed mode but this is generalized.
50
+ mark_as_consumed(last_group_message)
51
+
52
+ seek(coordinator.seek_offset) unless revoked?
53
+
54
+ resume
55
+ elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
56
+ retry_after_pause
57
+ else
58
+ coordinator.pause_tracker.reset
59
+ skippable_message = find_skippable_message
60
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
61
+ mark_as_consumed(skippable_message)
62
+ pause(coordinator.seek_offset, nil, false)
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Strategies
18
+ # ActiveJob enabled
19
+ # Manual offset management enabled
20
+ # Virtual Partitions enabled
21
+ module AjDlqMomVp
22
+ include Dlq
23
+ include Vp
24
+ include Default
25
+
26
+ # Features for this strategy
27
+ FEATURES = %i[
28
+ active_job
29
+ dead_letter_queue
30
+ manual_offset_management
31
+ virtual_partitions
32
+ ].freeze
33
+
34
+ # Flow including moving to DLQ in the collapsed mode
35
+ def handle_after_consume
36
+ coordinator.on_finished do |last_group_message|
37
+ if coordinator.success?
38
+ coordinator.pause_tracker.reset
39
+
40
+ # When this is an ActiveJob running via Pro with virtual partitions, we cannot mark
41
+ # intermediate jobs as processed not to mess up with the ordering.
42
+ # Only when all the jobs are processed and we did not loose the partition
43
+ # assignment and we are not stopping (Pro ActiveJob has an early break) we can
44
+ # commit offsets on this as only then we can be sure, that all the jobs were
45
+ # processed.
46
+ # For a non virtual partitions case, the flow is regular and state is marked after
47
+ # each successfully processed job
48
+ return if revoked?
49
+ return if Karafka::App.stopping?
50
+
51
+ mark_as_consumed(last_group_message)
52
+ elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
53
+ retry_after_pause
54
+ else
55
+ # Here we are in a collapsed state, hence we can apply the same logic as AjDlqMom
56
+ coordinator.pause_tracker.reset
57
+ skippable_message = find_skippable_message
58
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
59
+ mark_as_consumed(skippable_message)
60
+ pause(coordinator.seek_offset, nil, false)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -21,6 +21,7 @@ module Karafka
21
21
  # Virtual Partitions enabled
22
22
  module AjLrjMomVp
23
23
  include Default
24
+ include Vp
24
25
 
25
26
  # Features for this strategy
26
27
  FEATURES = %i[
@@ -19,6 +19,7 @@ module Karafka
19
19
  # Manual offset management enabled
20
20
  # Virtual Partitions enabled
21
21
  module AjMomVp
22
+ include Vp
22
23
  include Default
23
24
 
24
25
  # Features for this strategy
@@ -42,6 +43,9 @@ module Karafka
42
43
  # processed.
43
44
  # For a non virtual partitions case, the flow is regular and state is marked after
44
45
  # each successfully processed job
46
+ #
47
+ # We can mark and we do mark intermediate jobs in the collapsed mode when running
48
+ # VPs
45
49
  return if revoked?
46
50
  return if Karafka::App.stopping?
47
51
 
@@ -53,10 +53,10 @@ module Karafka
53
53
  end
54
54
 
55
55
  # Mark job as successful
56
- coordinator.consumption(self).success!
56
+ coordinator.success!(self)
57
57
  rescue StandardError => e
58
58
  # If failed, mark as failed
59
- coordinator.consumption(self).failure!(e)
59
+ coordinator.failure!(self, e)
60
60
 
61
61
  # Re-raise so reported in the consumer
62
62
  raise e
@@ -65,13 +65,8 @@ module Karafka
65
65
  # dispatch to DLQ
66
66
  def dispatch_to_dlq(skippable_message)
67
67
  producer.produce_async(
68
- topic: topic.dead_letter_queue.topic,
69
- payload: skippable_message.raw_payload,
70
- key: skippable_message.partition.to_s,
71
- headers: skippable_message.headers.merge(
72
- 'original_topic' => topic.name,
73
- 'original_partition' => skippable_message.partition.to_s,
74
- 'original_offset' => skippable_message.offset.to_s
68
+ build_dlq_message(
69
+ skippable_message
75
70
  )
76
71
  )
77
72
 
@@ -83,6 +78,35 @@ module Karafka
83
78
  )
84
79
  end
85
80
 
81
+ # @param skippable_message [Array<Karafka::Messages::Message>]
82
+ # @return [Hash] dispatch DLQ message
83
+ def build_dlq_message(skippable_message)
84
+ original_partition = skippable_message.partition.to_s
85
+
86
+ dlq_message = {
87
+ topic: topic.dead_letter_queue.topic,
88
+ key: original_partition,
89
+ payload: skippable_message.raw_payload,
90
+ headers: skippable_message.headers.merge(
91
+ 'original_topic' => topic.name,
92
+ 'original_partition' => original_partition,
93
+ 'original_offset' => skippable_message.offset.to_s,
94
+ 'original_consumer_group' => topic.consumer_group.id
95
+ )
96
+ }
97
+
98
+ # Optional method user can define in consumer to enhance the dlq message hash with
99
+ # some extra details if needed or to replace payload, etc
100
+ if respond_to?(:enhance_dlq_message, true)
101
+ enhance_dlq_message(
102
+ dlq_message,
103
+ skippable_message
104
+ )
105
+ end
106
+
107
+ dlq_message
108
+ end
109
+
86
110
  # @return [Boolean] should we dispatch the message to DLQ or not. When the dispatch topic
87
111
  # is set to false, we will skip the dispatch, effectively ignoring the broken message
88
112
  # without taking any action.
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Strategies
18
+ # Dead-Letter Queue enabled
19
+ # Long-Running Job enabled
20
+ # Virtual Partitions enabled
21
+ module DlqLrjVp
22
+ # Same flow as the Dlq Lrj because VP collapses on errors, so DlqLrj can kick in
23
+ include Vp
24
+ include DlqLrj
25
+
26
+ # Features for this strategy
27
+ FEATURES = %i[
28
+ dead_letter_queue
29
+ long_running_job
30
+ virtual_partitions
31
+ ].freeze
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Strategies
18
+ # Dead Letter Queue enabled
19
+ # Virtual Partitions enabled
20
+ #
21
+ # In general because we collapse processing in virtual partitions to one on errors, there
22
+ # is no special action that needs to be taken because we warranty that even with VPs
23
+ # on errors a retry collapses into a single state.
24
+ module DlqVp
25
+ # Features for this strategy
26
+ FEATURES = %i[
27
+ dead_letter_queue
28
+ virtual_partitions
29
+ ].freeze
30
+
31
+ include Dlq
32
+ include Vp
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -19,6 +19,7 @@ module Karafka
19
19
  # Virtual Partitions enabled
20
20
  module LrjVp
21
21
  # Same flow as the standard Lrj
22
+ include Vp
22
23
  include Lrj
23
24
 
24
25
  # Features for this strategy
@@ -25,6 +25,11 @@ module Karafka
25
25
  FEATURES = %i[
26
26
  virtual_partitions
27
27
  ].freeze
28
+
29
+ # @return [Boolean] is the virtual processing collapsed in the context of given consumer.
30
+ def collapsed?
31
+ coordinator.collapsed?
32
+ end
28
33
  end
29
34
  end
30
35
  end
@@ -26,10 +26,10 @@ module Karafka
26
26
  ).fetch('en').fetch('validations').fetch('topic')
27
27
  end
28
28
 
29
- # Make sure that we don't use DLQ with VP
30
- # Using those two would cause many issues because the offset within VP is not
31
- # manageable so in scenarios where we would fail on the last message, we would move by
32
- # one and try again and fail, and move by one and try again and fail and so on...
29
+ # Make sure that when we use virtual partitions with DLQ, at least one retry is set
30
+ # We cannot use VP with DLQ without retries as we in order to provide ordering
31
+ # warranties on errors with VP, we need to collapse the VPs concurrency and retry
32
+ # without any indeterministic work
33
33
  virtual do |data, errors|
34
34
  next unless errors.empty?
35
35
 
@@ -38,8 +38,9 @@ module Karafka
38
38
 
39
39
  next unless dead_letter_queue[:active]
40
40
  next unless virtual_partitions[:active]
41
+ next if dead_letter_queue[:max_retries].positive?
41
42
 
42
- [[%i[dead_letter_queue], :not_with_virtual_partitions]]
43
+ [[%i[dead_letter_queue], :with_virtual_partitions]]
43
44
  end
44
45
  end
45
46
  end