karafka 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +15 -0
  5. data/Gemfile +1 -1
  6. data/Gemfile.lock +22 -22
  7. data/README.md +2 -2
  8. data/bin/integrations +2 -1
  9. data/bin/rspecs +6 -2
  10. data/config/locales/errors.yml +30 -8
  11. data/config/locales/pro_errors.yml +2 -0
  12. data/docker-compose.yml +1 -1
  13. data/lib/karafka/app.rb +14 -0
  14. data/lib/karafka/cli/base.rb +19 -0
  15. data/lib/karafka/cli/server.rb +62 -76
  16. data/lib/karafka/cli/swarm.rb +30 -0
  17. data/lib/karafka/constraints.rb +3 -3
  18. data/lib/karafka/contracts/config.rb +19 -0
  19. data/lib/karafka/errors.rb +12 -0
  20. data/lib/karafka/helpers/async.rb +13 -3
  21. data/lib/karafka/helpers/config_importer.rb +30 -0
  22. data/lib/karafka/instrumentation/logger_listener.rb +31 -0
  23. data/lib/karafka/instrumentation/notifications.rb +9 -0
  24. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -0
  25. data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +72 -0
  26. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +11 -40
  27. data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +54 -0
  28. data/lib/karafka/pro/active_job/job_options_contract.rb +1 -1
  29. data/lib/karafka/pro/base_consumer.rb +16 -0
  30. data/lib/karafka/pro/connection/manager.rb +6 -1
  31. data/lib/karafka/pro/processing/coordinator.rb +13 -3
  32. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +74 -0
  33. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +107 -0
  34. data/lib/karafka/pro/processing/coordinators/virtual_offset_manager.rb +180 -0
  35. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +5 -7
  36. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +5 -7
  37. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +8 -10
  38. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +8 -16
  39. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +5 -7
  40. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +5 -7
  41. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +8 -10
  42. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +7 -9
  43. data/lib/karafka/pro/processing/strategies/dlq/default.rb +36 -10
  44. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +3 -7
  45. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +4 -8
  46. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +6 -9
  47. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +5 -15
  48. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +4 -8
  49. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +6 -9
  50. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +10 -20
  51. data/lib/karafka/pro/processing/strategies/vp/default.rb +7 -0
  52. data/lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb +6 -0
  53. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +39 -0
  54. data/lib/karafka/pro/swarm/liveness_listener.rb +171 -0
  55. data/lib/karafka/process.rb +27 -1
  56. data/lib/karafka/routing/features/dead_letter_queue/config.rb +2 -0
  57. data/lib/karafka/routing/subscription_group.rb +31 -9
  58. data/lib/karafka/runner.rb +4 -0
  59. data/lib/karafka/server.rb +13 -16
  60. data/lib/karafka/setup/config.rb +41 -2
  61. data/lib/karafka/status.rb +4 -2
  62. data/lib/karafka/swarm/liveness_listener.rb +55 -0
  63. data/lib/karafka/swarm/manager.rb +217 -0
  64. data/lib/karafka/swarm/node.rb +179 -0
  65. data/lib/karafka/swarm/pidfd.rb +131 -0
  66. data/lib/karafka/swarm/supervisor.rb +184 -0
  67. data/lib/karafka/swarm.rb +27 -0
  68. data/lib/karafka/templates/karafka.rb.erb +0 -2
  69. data/lib/karafka/version.rb +1 -1
  70. data/lib/karafka.rb +1 -1
  71. data.tar.gz.sig +0 -0
  72. metadata +17 -4
  73. metadata.gz.sig +0 -0
  74. data/lib/karafka/pro/processing/filters_applier.rb +0 -105
  75. data/lib/karafka/pro/processing/virtual_offset_manager.rb +0 -177
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Coordinators
18
+ # Manager that keeps track of our offsets with the virtualization layer that are local
19
+ # to given partition assignment. It allows for easier offset management for virtual
20
+ # virtual partition cases as it provides us ability to mark as consumed and move the
21
+ # real offset behind as expected.
22
+ #
23
+ # @note We still use the regular coordinator "real" offset management as we want to have
24
+ # them as separated as possible because the real seek offset management is also used for
25
+ # pausing, filtering and others and should not be impacted by the virtual one
26
+ #
27
+ # @note This manager is **not** thread-safe by itself. It should operate from coordinator
28
+ # locked locations.
29
+ class VirtualOffsetManager
30
+ attr_reader :groups
31
+
32
+ # @param topic [String]
33
+ # @param partition [Integer]
34
+ # @param offset_metadata_strategy [Symbol] what metadata should we select. That is,
35
+ # should we use the most recent or one picked from the offset that is going to be
36
+ # committed
37
+ #
38
+ # @note We need topic and partition because we use a seek message (virtual) for real
39
+ # offset management. We could keep real message reference but this can be memory
40
+ # consuming and not worth it.
41
+ def initialize(topic, partition, offset_metadata_strategy)
42
+ @topic = topic
43
+ @partition = partition
44
+ @groups = []
45
+ @marked = {}
46
+ @offsets_metadata = {}
47
+ @real_offset = -1
48
+ @offset_metadata_strategy = offset_metadata_strategy
49
+ @current_offset_metadata = nil
50
+ end
51
+
52
+ # Clears the manager for a next collective operation
53
+ def clear
54
+ @groups.clear
55
+ @offsets_metadata.clear
56
+ @current_offset_metadata = nil
57
+ @marked.clear
58
+ @real_offset = -1
59
+ end
60
+
61
+ # Registers an offset group coming from one virtual consumer. In order to move the real
62
+ # underlying offset accordingly, we need to make sure to track the virtual consumers
63
+ # offsets groups independently and only materialize the end result.
64
+ #
65
+ # @param offsets_group [Array<Integer>] offsets from one virtual consumer
66
+ def register(offsets_group)
67
+ @groups << offsets_group
68
+
69
+ offsets_group.each { |offset| @marked[offset] = false }
70
+ end
71
+
72
+ # Marks given message as marked (virtually consumed).
73
+ # We mark given message offset and other earlier offsets from the same group as done
74
+ # and we can refresh our real offset representation based on that as it might have
75
+ # changed to a newer real offset.
76
+ # @param message [Karafka::Messages::Message] message coming from VP we want to mark
77
+ # @param offset_metadata [String, nil] offset metadata. `nil` if none
78
+ def mark(message, offset_metadata)
79
+ offset = message.offset
80
+
81
+ # Store metadata when we materialize the most stable offset
82
+ @offsets_metadata[offset] = offset_metadata
83
+ @current_offset_metadata = offset_metadata
84
+
85
+ group = @groups.find { |reg_group| reg_group.include?(offset) }
86
+
87
+ # This case can happen when someone uses MoM and wants to mark message from a previous
88
+ # batch as consumed. We can add it, since the real offset refresh will point to it
89
+ unless group
90
+ group = [offset]
91
+ @groups << group
92
+ end
93
+
94
+ position = group.index(offset)
95
+
96
+ # Mark all previous messages from the same group also as virtually consumed
97
+ group[0..position].each do |markable_offset|
98
+ # Set previous messages metadata offset as the offset of higher one for overwrites
99
+ # unless a different metadata were set explicitely
100
+ @offsets_metadata[markable_offset] ||= offset_metadata
101
+ @marked[markable_offset] = true
102
+ end
103
+
104
+ # Recompute the real offset representation
105
+ materialize_real_offset
106
+ end
107
+
108
+ # Mark all from all groups including the `message`.
109
+ # Useful when operating in a collapsed state for marking
110
+ # @param message [Karafka::Messages::Message]
111
+ # @param offset_metadata [String, nil]
112
+ def mark_until(message, offset_metadata)
113
+ mark(message, offset_metadata)
114
+
115
+ @groups.each do |group|
116
+ group.each do |offset|
117
+ next if offset > message.offset
118
+
119
+ @offsets_metadata[offset] = offset_metadata
120
+ @marked[offset] = true
121
+ end
122
+ end
123
+
124
+ materialize_real_offset
125
+ end
126
+
127
+ # @return [Array<Integer>] Offsets of messages already marked as consumed virtually
128
+ def marked
129
+ @marked.select { |_, status| status }.map(&:first).sort
130
+ end
131
+
132
+ # Is there a real offset we can mark as consumed
133
+ # @return [Boolean]
134
+ def markable?
135
+ !@real_offset.negative?
136
+ end
137
+
138
+ # @return [Array<Messages::Seek, String>] markable message for real offset marking and
139
+ # its associated metadata
140
+ def markable
141
+ raise Errors::InvalidRealOffsetUsageError unless markable?
142
+
143
+ offset_metadata = case @offset_metadata_strategy
144
+ when :exact
145
+ @offsets_metadata.fetch(@real_offset)
146
+ when :current
147
+ @current_offset_metadata
148
+ else
149
+ raise Errors::UnsupportedCaseError, @offset_metadata_strategy
150
+ end
151
+
152
+ [
153
+ Messages::Seek.new(
154
+ @topic,
155
+ @partition,
156
+ @real_offset
157
+ ),
158
+ offset_metadata
159
+ ]
160
+ end
161
+
162
+ private
163
+
164
+ # Recomputes the biggest possible real offset we can have.
165
+ # It picks the the biggest offset that has uninterrupted stream of virtually marked as
166
+ # consumed because this will be the collective offset.
167
+ def materialize_real_offset
168
+ @marked.to_a.sort_by(&:first).each do |offset, marked|
169
+ break unless marked
170
+
171
+ @real_offset = offset
172
+ end
173
+
174
+ @real_offset = (@marked.keys.min - 1) if @real_offset.negative?
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
@@ -51,14 +51,12 @@ module Karafka
51
51
  else
52
52
  resume
53
53
  end
54
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
55
- retry_after_pause
56
54
  else
57
- coordinator.pause_tracker.reset
58
- skippable_message, = find_skippable_message
59
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
60
- mark_as_consumed(skippable_message)
61
- pause(coordinator.seek_offset, nil, false)
55
+ apply_dlq_flow do
56
+ skippable_message, = find_skippable_message
57
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
58
+ mark_as_consumed(skippable_message)
59
+ end
62
60
  end
63
61
  end
64
62
  end
@@ -57,14 +57,12 @@ module Karafka
57
57
  else
58
58
  resume
59
59
  end
60
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
61
- retry_after_pause
62
60
  else
63
- coordinator.pause_tracker.reset
64
- skippable_message, = find_skippable_message
65
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
66
- mark_as_consumed(skippable_message)
67
- pause(coordinator.seek_offset, nil, false)
61
+ apply_dlq_flow do
62
+ skippable_message, = find_skippable_message
63
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
64
+ mark_as_consumed(skippable_message)
65
+ end
68
66
  end
69
67
  end
70
68
  end
@@ -44,8 +44,6 @@ module Karafka
44
44
  return if coordinator.manual_pause?
45
45
 
46
46
  handle_post_filtering
47
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
48
- retry_after_pause
49
47
  # If we've reached number of retries that we could, we need to skip the first
50
48
  # message that was not marked as consumed, pause and continue, while also moving
51
49
  # this message to the dead topic.
@@ -53,14 +51,14 @@ module Karafka
53
51
  # For a Mom setup, this means, that user has to manage the checkpointing by
54
52
  # himself. If no checkpointing is ever done, we end up with an endless loop.
55
53
  else
56
- coordinator.pause_tracker.reset
57
- skippable_message, = find_skippable_message
58
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
59
- # We can commit the offset here because we know that we skip it "forever" and
60
- # since AJ consumer commits the offset after each job, we also know that the
61
- # previous job was successful
62
- mark_as_consumed(skippable_message)
63
- pause(coordinator.seek_offset, nil, false)
54
+ apply_dlq_flow do
55
+ skippable_message, = find_skippable_message
56
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
57
+ # We can commit the offset here because we know that we skip it "forever" and
58
+ # since AJ consumer commits the offset after each job, we also know that the
59
+ # previous job was successful
60
+ mark_as_consumed(skippable_message)
61
+ end
64
62
  end
65
63
  end
66
64
  end
@@ -48,23 +48,15 @@ module Karafka
48
48
  mark_as_consumed(last_group_message)
49
49
 
50
50
  handle_post_filtering
51
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
52
- retry_after_pause
53
- # If we've reached number of retries that we could, we need to skip the first
54
- # message that was not marked as consumed, pause and continue, while also moving
55
- # this message to the dead topic.
56
- #
57
- # For a Mom setup, this means, that user has to manage the checkpointing by
58
- # himself. If no checkpointing is ever done, we end up with an endless loop.
59
51
  else
60
- coordinator.pause_tracker.reset
61
- skippable_message, = find_skippable_message
62
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
63
- # We can commit the offset here because we know that we skip it "forever" and
64
- # since AJ consumer commits the offset after each job, we also know that the
65
- # previous job was successful
66
- mark_as_consumed(skippable_message)
67
- pause(coordinator.seek_offset, nil, false)
52
+ apply_dlq_flow do
53
+ skippable_message, = find_skippable_message
54
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
55
+ # We can commit the offset here because we know that we skip it "forever" and
56
+ # since AJ consumer commits the offset after each job, we also know that the
57
+ # previous job was successful
58
+ mark_as_consumed(skippable_message)
59
+ end
68
60
  end
69
61
  end
70
62
  end
@@ -47,14 +47,12 @@ module Karafka
47
47
  seek(coordinator.seek_offset, false) unless revoked?
48
48
 
49
49
  resume
50
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
51
- retry_after_pause
52
50
  else
53
- coordinator.pause_tracker.reset
54
- skippable_message, = find_skippable_message
55
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
56
- mark_as_consumed(skippable_message)
57
- pause(coordinator.seek_offset, nil, false)
51
+ apply_dlq_flow do
52
+ skippable_message, = find_skippable_message
53
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
54
+ mark_as_consumed(skippable_message)
55
+ end
58
56
  end
59
57
  end
60
58
  end
@@ -51,14 +51,12 @@ module Karafka
51
51
  seek(coordinator.seek_offset, false) unless revoked?
52
52
 
53
53
  resume
54
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
55
- retry_after_pause
56
54
  else
57
- coordinator.pause_tracker.reset
58
- skippable_message, = find_skippable_message
59
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
60
- mark_as_consumed(skippable_message)
61
- pause(coordinator.seek_offset, nil, false)
55
+ apply_dlq_flow do
56
+ skippable_message, = find_skippable_message
57
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
58
+ mark_as_consumed(skippable_message)
59
+ end
62
60
  end
63
61
  end
64
62
  end
@@ -42,17 +42,15 @@ module Karafka
42
42
  if coordinator.success?
43
43
  # Do NOT commit offsets, they are comitted after each job in the AJ consumer.
44
44
  coordinator.pause_tracker.reset
45
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
46
- retry_after_pause
47
45
  else
48
- coordinator.pause_tracker.reset
49
- skippable_message, = find_skippable_message
50
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
51
- # We can commit the offset here because we know that we skip it "forever" and
52
- # since AJ consumer commits the offset after each job, we also know that the
53
- # previous job was successful
54
- mark_as_consumed(skippable_message)
55
- pause(coordinator.seek_offset, nil, false)
46
+ apply_dlq_flow do
47
+ skippable_message, = find_skippable_message
48
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
49
+ # We can commit the offset here because we know that we skip it "forever" and
50
+ # since AJ consumer commits the offset after each job, we also know that the
51
+ # previous job was successful
52
+ mark_as_consumed(skippable_message)
53
+ end
56
54
  end
57
55
  end
58
56
  end
@@ -48,16 +48,14 @@ module Karafka
48
48
  return if revoked?
49
49
 
50
50
  mark_as_consumed(last_group_message)
51
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
52
- retry_after_pause
53
51
  else
54
- # Here we are in a collapsed state, hence we can apply the same logic as
55
- # Aj::DlqMom
56
- coordinator.pause_tracker.reset
57
- skippable_message, = find_skippable_message
58
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
59
- mark_as_consumed(skippable_message)
60
- pause(coordinator.seek_offset, nil, false)
52
+ apply_dlq_flow do
53
+ # Here we are in a collapsed state, hence we can apply the same logic as
54
+ # Aj::DlqMom
55
+ skippable_message, = find_skippable_message
56
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
57
+ mark_as_consumed(skippable_message)
58
+ end
61
59
  end
62
60
  end
63
61
  end
@@ -76,16 +76,10 @@ module Karafka
76
76
  return if coordinator.manual_pause?
77
77
 
78
78
  mark_as_consumed(last_group_message)
79
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
80
- retry_after_pause
81
- # If we've reached number of retries that we could, we need to skip the first
82
- # message that was not marked as consumed, pause and continue, while also moving
83
- # this message to the dead topic
84
79
  else
85
- # We reset the pause to indicate we will now consider it as "ok".
86
- coordinator.pause_tracker.reset
87
- dispatch_if_needed_and_mark_as_consumed
88
- pause(coordinator.seek_offset, nil, false)
80
+ apply_dlq_flow do
81
+ dispatch_if_needed_and_mark_as_consumed
82
+ end
89
83
  end
90
84
  end
91
85
  end
@@ -183,7 +177,10 @@ module Karafka
183
177
  # topic is set to false, we will skip the dispatch, effectively ignoring the broken
184
178
  # message without taking any action.
185
179
  def dispatch_to_dlq?
186
- topic.dead_letter_queue.topic
180
+ return false unless topic.dead_letter_queue.topic
181
+ return false unless @_dispatch_to_dlq
182
+
183
+ true
187
184
  end
188
185
 
189
186
  # @return [Boolean] should we use a transaction to move the data to the DLQ.
@@ -192,6 +189,35 @@ module Karafka
192
189
  def dispatch_in_a_transaction?
193
190
  producer.transactional? && topic.dead_letter_queue.transactional?
194
191
  end
192
+
193
+ # Runs the DLQ strategy and based on it it performs certain operations
194
+ #
195
+ # In case of `:skip` and `:dispatch` will run the exact flow provided in a block
196
+ # In case of `:retry` always `#retry_after_pause` is applied
197
+ def apply_dlq_flow
198
+ flow = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
199
+
200
+ case flow
201
+ when :retry
202
+ retry_after_pause
203
+
204
+ return
205
+ when :skip
206
+ @_dispatch_to_dlq = false
207
+ when :dispatch
208
+ @_dispatch_to_dlq = true
209
+ else
210
+ raise Karafka::UnsupportedCaseError, flow
211
+ end
212
+
213
+ # We reset the pause to indicate we will now consider it as "ok".
214
+ coordinator.pause_tracker.reset
215
+
216
+ yield
217
+
218
+ # Always backoff after DLQ dispatch even on skip to prevent overloads on errors
219
+ pause(coordinator.seek_offset, nil, false)
220
+ end
195
221
  end
196
222
  end
197
223
  end
@@ -42,14 +42,10 @@ module Karafka
42
42
  mark_as_consumed(last_group_message)
43
43
 
44
44
  handle_post_filtering
45
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
46
- retry_after_pause
47
45
  else
48
- coordinator.pause_tracker.reset
49
-
50
- dispatch_if_needed_and_mark_as_consumed
51
-
52
- pause(coordinator.seek_offset, nil, false)
46
+ apply_dlq_flow do
47
+ dispatch_if_needed_and_mark_as_consumed
48
+ end
53
49
  end
54
50
  end
55
51
  end
@@ -53,16 +53,12 @@ module Karafka
53
53
  else
54
54
  resume
55
55
  end
56
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
57
- retry_after_pause
58
56
  else
59
- coordinator.pause_tracker.reset
60
-
61
- return resume if revoked?
57
+ apply_dlq_flow do
58
+ return resume if revoked?
62
59
 
63
- dispatch_if_needed_and_mark_as_consumed
64
-
65
- pause(coordinator.seek_offset, nil, false)
60
+ dispatch_if_needed_and_mark_as_consumed
61
+ end
66
62
  end
67
63
  end
68
64
  end
@@ -48,18 +48,15 @@ module Karafka
48
48
  else
49
49
  resume
50
50
  end
51
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
52
- retry_after_pause
53
51
  else
54
- coordinator.pause_tracker.reset
55
-
56
- return resume if revoked?
52
+ apply_dlq_flow do
53
+ return resume if revoked?
57
54
 
58
- skippable_message, _marked = find_skippable_message
59
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
55
+ skippable_message, _marked = find_skippable_message
56
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
60
57
 
61
- coordinator.seek_offset = skippable_message.offset + 1
62
- pause(coordinator.seek_offset, nil, false)
58
+ coordinator.seek_offset = skippable_message.offset + 1
59
+ end
63
60
  end
64
61
  end
65
62
  end
@@ -41,23 +41,13 @@ module Karafka
41
41
  return if coordinator.manual_pause?
42
42
 
43
43
  handle_post_filtering
44
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
45
- retry_after_pause
46
- # If we've reached number of retries that we could, we need to skip the first
47
- # message that was not marked as consumed, pause and continue, while also moving
48
- # this message to the dead topic.
49
- #
50
- # For a Mom setup, this means, that user has to manage the checkpointing by
51
- # himself. If no checkpointing is ever done, we end up with an endless loop.
52
44
  else
53
- # We reset the pause to indicate we will now consider it as "ok".
54
- coordinator.pause_tracker.reset
55
-
56
- skippable_message, _marked = find_skippable_message
57
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
45
+ apply_dlq_flow do
46
+ skippable_message, _marked = find_skippable_message
47
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
58
48
 
59
- coordinator.seek_offset = skippable_message.offset + 1
60
- pause(coordinator.seek_offset, nil, false)
49
+ coordinator.seek_offset = skippable_message.offset + 1
50
+ end
61
51
  end
62
52
  end
63
53
  end
@@ -42,16 +42,12 @@ module Karafka
42
42
  seek(coordinator.seek_offset, false) unless revoked? || coordinator.manual_seek?
43
43
 
44
44
  resume
45
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
46
- retry_after_pause
47
45
  else
48
- coordinator.pause_tracker.reset
49
-
50
- return resume if revoked?
51
-
52
- dispatch_if_needed_and_mark_as_consumed
46
+ apply_dlq_flow do
47
+ return resume if revoked?
53
48
 
54
- pause(coordinator.seek_offset, nil, false)
49
+ dispatch_if_needed_and_mark_as_consumed
50
+ end
55
51
  end
56
52
  end
57
53
  end
@@ -42,18 +42,15 @@ module Karafka
42
42
  end
43
43
 
44
44
  resume
45
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
46
- retry_after_pause
47
45
  else
48
- coordinator.pause_tracker.reset
49
-
50
- return resume if revoked?
46
+ apply_dlq_flow do
47
+ return resume if revoked?
51
48
 
52
- skippable_message, _marked = find_skippable_message
53
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
49
+ skippable_message, _marked = find_skippable_message
50
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
54
51
 
55
- coordinator.seek_offset = skippable_message.offset + 1
56
- pause(coordinator.seek_offset, nil, false)
52
+ coordinator.seek_offset = skippable_message.offset + 1
53
+ end
57
54
  end
58
55
  end
59
56
  end
@@ -35,28 +35,18 @@ module Karafka
35
35
 
36
36
  if coordinator.success?
37
37
  coordinator.pause_tracker.reset
38
- elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
39
- retry_after_pause
40
- # If we've reached number of retries that we could, we need to skip the first
41
- # message that was not marked as consumed, pause and continue, while also moving
42
- # this message to the dead topic.
43
- #
44
- # For a Mom setup, this means, that user has to manage the checkpointing by
45
- # himself. If no checkpointing is ever done, we end up with an endless loop.
46
38
  else
47
- # We reset the pause to indicate we will now consider it as "ok".
48
- coordinator.pause_tracker.reset
49
-
50
- skippable_message, = find_skippable_message
51
- dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
39
+ apply_dlq_flow do
40
+ skippable_message, = find_skippable_message
41
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
52
42
 
53
- # Save the next offset we want to go with after moving given message to DLQ
54
- # Without this, we would not be able to move forward and we would end up
55
- # in an infinite loop trying to un-pause from the message we've already processed
56
- # Of course, since it's a MoM a rebalance or kill, will move it back as no
57
- # offsets are being committed
58
- coordinator.seek_offset = skippable_message.offset + 1
59
- pause(coordinator.seek_offset, nil, false)
43
+ # Save the next offset we want to go with after moving given message to DLQ
44
+ # Without this, we would not be able to move forward and we would end up
45
+ # in an infinite loop trying to un-pause from the message we've already
46
+ # processed. Of course, since it's a MoM a rebalance or kill, will move it back
47
+ # as no offsets are being committed
48
+ coordinator.seek_offset = skippable_message.offset + 1
49
+ end
60
50
  end
61
51
  end
62
52
  end
@@ -155,6 +155,13 @@ module Karafka
155
155
  def handle_before_schedule_consume
156
156
  super
157
157
 
158
+ # We should not register offsets in virtual manager when in collapse as virtual
159
+ # manager is not used then for offsets materialization.
160
+ #
161
+ # If we would do so, it would cause increased storage in cases of endless errors
162
+ # that are being retried in collapse without a DLQ.
163
+ return if collapsed?
164
+
158
165
  coordinator.virtual_offset_manager.register(
159
166
  messages.map(&:offset)
160
167
  )