karafka 2.0.41 → 2.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +2 -2
  4. data/CHANGELOG.md +36 -1
  5. data/Gemfile.lock +17 -17
  6. data/README.md +2 -2
  7. data/config/locales/errors.yml +10 -0
  8. data/config/locales/pro_errors.yml +0 -2
  9. data/docker-compose.yml +2 -2
  10. data/karafka.gemspec +2 -2
  11. data/lib/karafka/active_job/consumer.rb +16 -11
  12. data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
  13. data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
  14. data/lib/karafka/active_job/current_attributes.rb +42 -0
  15. data/lib/karafka/active_job/dispatcher.rb +8 -2
  16. data/lib/karafka/base_consumer.rb +1 -1
  17. data/lib/karafka/connection/client.rb +3 -1
  18. data/lib/karafka/errors.rb +3 -0
  19. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +176 -0
  20. data/lib/karafka/messages/batch_metadata.rb +9 -2
  21. data/lib/karafka/pro/active_job/consumer.rb +1 -10
  22. data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
  23. data/lib/karafka/pro/processing/coordinator.rb +20 -1
  24. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
  25. data/lib/karafka/pro/processing/filters_applier.rb +4 -0
  26. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  27. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
  28. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
  29. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  30. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
  31. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
  32. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
  33. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
  34. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
  35. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
  36. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
  37. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
  38. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
  39. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
  40. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
  41. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
  42. data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
  43. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  44. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
  45. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  46. data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
  47. data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
  48. data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
  49. data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
  50. data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
  51. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
  52. data/lib/karafka/processing/strategies/default.rb +2 -0
  53. data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
  54. data/lib/karafka/version.rb +1 -1
  55. data/lib/karafka.rb +5 -0
  56. data.tar.gz.sig +0 -0
  57. metadata +20 -8
  58. metadata.gz.sig +0 -0
  59. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16
@@ -28,6 +28,44 @@ module Karafka
28
28
  virtual_partitions
29
29
  ].freeze
30
30
 
31
+ # @param message [Karafka::Messages::Message] marks message as consumed
32
+ # @note This virtual offset management uses a regular default marking API underneath.
33
+ # We do not alter the "real" marking API, as VPs are just one of many cases we want
34
+ # to support and we do not want to impact them with collective offsets management
35
+ def mark_as_consumed(message)
36
+ return super if collapsed?
37
+
38
+ manager = coordinator.virtual_offset_manager
39
+
40
+ coordinator.synchronize do
41
+ manager.mark(message)
42
+ # If this is last marking on a finished flow, we can use the original
43
+ # last message and in order to do so, we need to mark all previous messages as
44
+ # consumed as otherwise the computed offset could be different
45
+ # We mark until our offset just in case of a DLQ flow or similar, where we do not
46
+ # want to mark all but until the expected location
47
+ manager.mark_until(message) if coordinator.finished?
48
+
49
+ return revoked? unless manager.markable?
50
+ end
51
+
52
+ manager.markable? ? super(manager.markable) : revoked?
53
+ end
54
+
55
+ # @param message [Karafka::Messages::Message] blocking marks message as consumed
56
+ def mark_as_consumed!(message)
57
+ return super if collapsed?
58
+
59
+ manager = coordinator.virtual_offset_manager
60
+
61
+ coordinator.synchronize do
62
+ manager.mark(message)
63
+ manager.mark_until(message) if coordinator.finished?
64
+ end
65
+
66
+ manager.markable? ? super(manager.markable) : revoked?
67
+ end
68
+
31
69
  # @return [Boolean] is the virtual processing collapsed in the context of given
32
70
  # consumer.
33
71
  def collapsed?
@@ -45,6 +83,19 @@ module Karafka
45
83
  def failing?
46
84
  coordinator.failure?
47
85
  end
86
+
87
+ private
88
+
89
+ # Prior to adding work to the queue, registers all the messages offsets into the
90
+ # virtual offset group.
91
+ #
92
+ # @note This can be done without the mutex, because it happens from the same thread
93
+ # for all the work (listener thread)
94
+ def handle_before_enqueue
95
+ coordinator.virtual_offset_manager.register(
96
+ messages.map(&:offset)
97
+ )
98
+ end
48
99
  end
49
100
  end
50
101
  end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Manager that keeps track of our offsets with the virtualization layer that are local
18
+ # to given partition assignment. It allows for easier offset management for virtual
19
+ # virtual partition cases as it provides us ability to mark as consumed and move the
20
+ # real offset behind as expected.
21
+ #
22
+ # @note We still use the regular coordinator "real" offset management as we want to have
23
+ # them as separated as possible because the real seek offset management is also used for
24
+ # pausing, filtering and others and should not be impacted by the virtual one
25
+ #
26
+ # @note This manager is **not** thread-safe by itself. It should operate from coordinator
27
+ # locked locations.
28
+ class VirtualOffsetManager
29
+ attr_reader :groups
30
+
31
+ # @param topic [String]
32
+ # @param partition [Integer]
33
+ #
34
+ # @note We need topic and partition because we use a seek message (virtual) for real offset
35
+ # management. We could keep real message reference but this can be memory consuming
36
+ # and not worth it.
37
+ def initialize(topic, partition)
38
+ @topic = topic
39
+ @partition = partition
40
+ @groups = []
41
+ @marked = {}
42
+ @real_offset = -1
43
+ end
44
+
45
+ # Clears the manager for a next collective operation
46
+ def clear
47
+ @groups.clear
48
+ @marked = {}
49
+ @real_offset = -1
50
+ end
51
+
52
+ # Registers an offset group coming from one virtual consumer. In order to move the real
53
+ # underlying offset accordingly, we need to make sure to track the virtual consumers
54
+ # offsets groups independently and only materialize the end result.
55
+ #
56
+ # @param offsets_group [Array<Integer>] offsets from one virtual consumer
57
+ def register(offsets_group)
58
+ @groups << offsets_group
59
+
60
+ offsets_group.each { |offset| @marked[offset] = false }
61
+ end
62
+
63
+ # Marks given message as marked (virtually consumed).
64
+ # We mark given message offset and other earlier offsets from the same group as done
65
+ # and we can refresh our real offset representation based on that as it might have changed
66
+ # to a newer real offset.
67
+ # @param message [Karafka::Messages::Message] message coming from VP we want to mark
68
+ def mark(message)
69
+ offset = message.offset
70
+
71
+ group = @groups.find { |reg_group| reg_group.include?(offset) }
72
+
73
+ # This case can happen when someone uses MoM and wants to mark message from a previous
74
+ # batch as consumed. We can add it, since the real offset refresh will point to it
75
+ unless group
76
+ group = [offset]
77
+ @groups << group
78
+ end
79
+
80
+ position = group.index(offset)
81
+
82
+ # Mark all previous messages from the same group also as virtually consumed
83
+ group[0..position].each do |markable_offset|
84
+ @marked[markable_offset] = true
85
+ end
86
+
87
+ # Recompute the real offset representation
88
+ materialize_real_offset
89
+ end
90
+
91
+ # Mark all from all groups including the `message`.
92
+ # Useful when operating in a collapsed state for marking
93
+ # @param message [Karafka::Messages::Message]
94
+ def mark_until(message)
95
+ mark(message)
96
+
97
+ @groups.each do |group|
98
+ group.each do |offset|
99
+ next if offset > message.offset
100
+
101
+ @marked[offset] = true
102
+ end
103
+ end
104
+
105
+ materialize_real_offset
106
+ end
107
+
108
+ # @return [Array<Integer>] Offsets of messages already marked as consumed virtually
109
+ def marked
110
+ @marked.select { |_, status| status }.map(&:first).sort
111
+ end
112
+
113
+ # Is there a real offset we can mark as consumed
114
+ # @return [Boolean]
115
+ def markable?
116
+ !@real_offset.negative?
117
+ end
118
+
119
+ # @return [Messages::Seek] markable message for real offset marking
120
+ def markable
121
+ raise Errors::InvalidRealOffsetUsage unless markable?
122
+
123
+ Messages::Seek.new(
124
+ @topic,
125
+ @partition,
126
+ @real_offset
127
+ )
128
+ end
129
+
130
+ private
131
+
132
+ # Recomputes the biggest possible real offset we can have.
133
+ # It picks the the biggest offset that has uninterrupted stream of virtually marked as
134
+ # consumed because this will be the collective offset.
135
+ def materialize_real_offset
136
+ @marked.to_a.sort_by(&:first).each do |offset, marked|
137
+ break unless marked
138
+
139
+ @real_offset = offset
140
+ end
141
+
142
+ @real_offset = (@marked.keys.min - 1) if @real_offset.negative?
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end
@@ -44,23 +44,6 @@ module Karafka
44
44
 
45
45
  [[%i[virtual_partitions partitioner], :respond_to_call]]
46
46
  end
47
-
48
- # Make sure that manual offset management is not used together with Virtual Partitions
49
- # This would not make any sense as there would be edge cases related to skipping
50
- # messages even if there were errors.
51
- virtual do |data, errors|
52
- next unless errors.empty?
53
-
54
- virtual_partitions = data[:virtual_partitions]
55
- manual_offset_management = data[:manual_offset_management]
56
- active_job = data[:active_job]
57
-
58
- next unless virtual_partitions[:active]
59
- next unless manual_offset_management[:active]
60
- next if active_job[:active]
61
-
62
- [[%i[manual_offset_management], :not_with_virtual_partitions]]
63
- end
64
47
  end
65
48
  end
66
49
  end
@@ -26,6 +26,7 @@ module Karafka
26
26
  def mark_as_consumed(message)
27
27
  # Ignore earlier offsets than the one we already committed
28
28
  return true if coordinator.seek_offset > message.offset
29
+ return false if revoked?
29
30
 
30
31
  unless client.mark_as_consumed(message)
31
32
  coordinator.revoke
@@ -46,6 +47,7 @@ module Karafka
46
47
  def mark_as_consumed!(message)
47
48
  # Ignore earlier offsets than the one we already committed
48
49
  return true if coordinator.seek_offset > message.offset
50
+ return false if revoked?
49
51
 
50
52
  unless client.mark_as_consumed!(message)
51
53
  coordinator.revoke
@@ -29,16 +29,18 @@ module Karafka
29
29
  # We reset the pause to indicate we will now consider it as "ok".
30
30
  coordinator.pause_tracker.reset
31
31
 
32
- skippable_message, marked = find_skippable_message
32
+ skippable_message, = find_skippable_message
33
33
 
34
34
  dispatch_to_dlq(skippable_message)
35
35
 
36
- # Backoff and move forward
37
- if marked
38
- pause(coordinator.seek_offset, nil, false)
39
- else
40
- pause(skippable_message.offset + 1, nil, false)
41
- end
36
+ # Save the next offset we want to go with after moving given message to DLQ
37
+ # Without this, we would not be able to move forward and we would end up
38
+ # in an infinite loop trying to un-pause from the message we've already processed
39
+ # Of course, since it's a MoM a rebalance or kill, will move it back as no
40
+ # offsets are being committed
41
+ coordinator.seek_offset = skippable_message.offset + 1
42
+
43
+ pause(coordinator.seek_offset, nil, false)
42
44
  end
43
45
  end
44
46
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.41'
6
+ VERSION = '2.1.4'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -114,6 +114,11 @@ end
114
114
  loader = Zeitwerk::Loader.for_gem
115
115
  # Do not load Rails extensions by default, this will be handled by Railtie if they are needed
116
116
  loader.ignore(Karafka.gem_root.join('lib/active_job'))
117
+ # Do not load CurrentAttributes components as they will be loaded if needed
118
+ # @note We have to exclude both the .rb file as well as the whole directory so users can require
119
+ # current attributes only when needed
120
+ loader.ignore(Karafka.gem_root.join('lib/karafka/active_job/current_attributes'))
121
+ loader.ignore(Karafka.gem_root.join('lib/karafka/active_job/current_attributes.rb'))
117
122
  # Do not load Railtie. It will load if after everything is ready, so we don't have to load any
118
123
  # Karafka components when we require this railtie. Railtie needs to be loaded last.
119
124
  loader.ignore(Karafka.gem_root.join('lib/karafka/railtie'))
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.41
4
+ version: 2.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2023-04-19 00:00:00.000000000 Z
38
+ date: 2023-06-06 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -43,7 +43,7 @@ dependencies:
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.0.12
46
+ version: 2.0.13
47
47
  - - "<"
48
48
  - !ruby/object:Gem::Version
49
49
  version: 3.0.0
@@ -53,7 +53,7 @@ dependencies:
53
53
  requirements:
54
54
  - - ">="
55
55
  - !ruby/object:Gem::Version
56
- version: 2.0.12
56
+ version: 2.0.13
57
57
  - - "<"
58
58
  - !ruby/object:Gem::Version
59
59
  version: 3.0.0
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: 2.4.10
80
+ version: 2.5.3
81
81
  - - "<"
82
82
  - !ruby/object:Gem::Version
83
83
  version: 3.0.0
@@ -87,7 +87,7 @@ dependencies:
87
87
  requirements:
88
88
  - - ">="
89
89
  - !ruby/object:Gem::Version
90
- version: 2.4.10
90
+ version: 2.5.3
91
91
  - - "<"
92
92
  - !ruby/object:Gem::Version
93
93
  version: 3.0.0
@@ -156,6 +156,9 @@ files:
156
156
  - lib/active_job/queue_adapters/karafka_adapter.rb
157
157
  - lib/karafka.rb
158
158
  - lib/karafka/active_job/consumer.rb
159
+ - lib/karafka/active_job/current_attributes.rb
160
+ - lib/karafka/active_job/current_attributes/loading.rb
161
+ - lib/karafka/active_job/current_attributes/persistence.rb
159
162
  - lib/karafka/active_job/dispatcher.rb
160
163
  - lib/karafka/active_job/job_extensions.rb
161
164
  - lib/karafka/active_job/job_options_contract.rb
@@ -197,9 +200,9 @@ files:
197
200
  - lib/karafka/instrumentation/notifications.rb
198
201
  - lib/karafka/instrumentation/proctitle_listener.rb
199
202
  - lib/karafka/instrumentation/vendors/datadog/dashboard.json
200
- - lib/karafka/instrumentation/vendors/datadog/listener.rb
201
203
  - lib/karafka/instrumentation/vendors/datadog/logger_listener.rb
202
204
  - lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb
205
+ - lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb
203
206
  - lib/karafka/licenser.rb
204
207
  - lib/karafka/messages/batch_metadata.rb
205
208
  - lib/karafka/messages/builders/batch_metadata.rb
@@ -216,7 +219,6 @@ files:
216
219
  - lib/karafka/pro/active_job/consumer.rb
217
220
  - lib/karafka/pro/active_job/dispatcher.rb
218
221
  - lib/karafka/pro/active_job/job_options_contract.rb
219
- - lib/karafka/pro/base_consumer.rb
220
222
  - lib/karafka/pro/encryption.rb
221
223
  - lib/karafka/pro/encryption/cipher.rb
222
224
  - lib/karafka/pro/encryption/contracts/config.rb
@@ -233,6 +235,7 @@ files:
233
235
  - lib/karafka/pro/processing/filters/delayer.rb
234
236
  - lib/karafka/pro/processing/filters/expirer.rb
235
237
  - lib/karafka/pro/processing/filters/throttler.rb
238
+ - lib/karafka/pro/processing/filters/virtual_limiter.rb
236
239
  - lib/karafka/pro/processing/filters_applier.rb
237
240
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
238
241
  - lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
@@ -262,26 +265,35 @@ files:
262
265
  - lib/karafka/pro/processing/strategies/dlq/ftr.rb
263
266
  - lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb
264
267
  - lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb
268
+ - lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb
265
269
  - lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb
266
270
  - lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb
271
+ - lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb
267
272
  - lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb
268
273
  - lib/karafka/pro/processing/strategies/dlq/lrj.rb
269
274
  - lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb
275
+ - lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb
270
276
  - lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb
271
277
  - lib/karafka/pro/processing/strategies/dlq/mom.rb
278
+ - lib/karafka/pro/processing/strategies/dlq/mom_vp.rb
272
279
  - lib/karafka/pro/processing/strategies/dlq/vp.rb
273
280
  - lib/karafka/pro/processing/strategies/ftr/default.rb
274
281
  - lib/karafka/pro/processing/strategies/ftr/vp.rb
275
282
  - lib/karafka/pro/processing/strategies/lrj/default.rb
276
283
  - lib/karafka/pro/processing/strategies/lrj/ftr.rb
277
284
  - lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb
285
+ - lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb
278
286
  - lib/karafka/pro/processing/strategies/lrj/ftr_vp.rb
279
287
  - lib/karafka/pro/processing/strategies/lrj/mom.rb
288
+ - lib/karafka/pro/processing/strategies/lrj/mom_vp.rb
280
289
  - lib/karafka/pro/processing/strategies/lrj/vp.rb
281
290
  - lib/karafka/pro/processing/strategies/mom/default.rb
282
291
  - lib/karafka/pro/processing/strategies/mom/ftr.rb
292
+ - lib/karafka/pro/processing/strategies/mom/ftr_vp.rb
293
+ - lib/karafka/pro/processing/strategies/mom/vp.rb
283
294
  - lib/karafka/pro/processing/strategies/vp/default.rb
284
295
  - lib/karafka/pro/processing/strategy_selector.rb
296
+ - lib/karafka/pro/processing/virtual_offset_manager.rb
285
297
  - lib/karafka/pro/routing/features/base.rb
286
298
  - lib/karafka/pro/routing/features/dead_letter_queue.rb
287
299
  - lib/karafka/pro/routing/features/dead_letter_queue/contract.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,16 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'metrics_listener'
4
-
5
- module Karafka
6
- module Instrumentation
7
- # Namespace for vendor specific instrumentation
8
- module Vendors
9
- # Datadog specific instrumentation
10
- module Datadog
11
- # Alias to keep backwards compatibility
12
- Listener = MetricsListener
13
- end
14
- end
15
- end
16
- end