karafka 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +15 -0
  5. data/Gemfile +1 -1
  6. data/Gemfile.lock +22 -22
  7. data/README.md +2 -2
  8. data/bin/integrations +2 -1
  9. data/bin/rspecs +6 -2
  10. data/config/locales/errors.yml +30 -8
  11. data/config/locales/pro_errors.yml +2 -0
  12. data/docker-compose.yml +1 -1
  13. data/lib/karafka/app.rb +14 -0
  14. data/lib/karafka/cli/base.rb +19 -0
  15. data/lib/karafka/cli/server.rb +62 -76
  16. data/lib/karafka/cli/swarm.rb +30 -0
  17. data/lib/karafka/constraints.rb +3 -3
  18. data/lib/karafka/contracts/config.rb +19 -0
  19. data/lib/karafka/errors.rb +12 -0
  20. data/lib/karafka/helpers/async.rb +13 -3
  21. data/lib/karafka/helpers/config_importer.rb +30 -0
  22. data/lib/karafka/instrumentation/logger_listener.rb +31 -0
  23. data/lib/karafka/instrumentation/notifications.rb +9 -0
  24. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -0
  25. data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +72 -0
  26. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +11 -40
  27. data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +54 -0
  28. data/lib/karafka/pro/active_job/job_options_contract.rb +1 -1
  29. data/lib/karafka/pro/base_consumer.rb +16 -0
  30. data/lib/karafka/pro/connection/manager.rb +6 -1
  31. data/lib/karafka/pro/processing/coordinator.rb +13 -3
  32. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +74 -0
  33. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +107 -0
  34. data/lib/karafka/pro/processing/coordinators/virtual_offset_manager.rb +180 -0
  35. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +5 -7
  36. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +5 -7
  37. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +8 -10
  38. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +8 -16
  39. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +5 -7
  40. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +5 -7
  41. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +8 -10
  42. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +7 -9
  43. data/lib/karafka/pro/processing/strategies/dlq/default.rb +36 -10
  44. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +3 -7
  45. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +4 -8
  46. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +6 -9
  47. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +5 -15
  48. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +4 -8
  49. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +6 -9
  50. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +10 -20
  51. data/lib/karafka/pro/processing/strategies/vp/default.rb +7 -0
  52. data/lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb +6 -0
  53. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +39 -0
  54. data/lib/karafka/pro/swarm/liveness_listener.rb +171 -0
  55. data/lib/karafka/process.rb +27 -1
  56. data/lib/karafka/routing/features/dead_letter_queue/config.rb +2 -0
  57. data/lib/karafka/routing/subscription_group.rb +31 -9
  58. data/lib/karafka/runner.rb +4 -0
  59. data/lib/karafka/server.rb +13 -16
  60. data/lib/karafka/setup/config.rb +41 -2
  61. data/lib/karafka/status.rb +4 -2
  62. data/lib/karafka/swarm/liveness_listener.rb +55 -0
  63. data/lib/karafka/swarm/manager.rb +217 -0
  64. data/lib/karafka/swarm/node.rb +179 -0
  65. data/lib/karafka/swarm/pidfd.rb +131 -0
  66. data/lib/karafka/swarm/supervisor.rb +184 -0
  67. data/lib/karafka/swarm.rb +27 -0
  68. data/lib/karafka/templates/karafka.rb.erb +0 -2
  69. data/lib/karafka/version.rb +1 -1
  70. data/lib/karafka.rb +1 -1
  71. data.tar.gz.sig +0 -0
  72. metadata +17 -4
  73. metadata.gz.sig +0 -0
  74. data/lib/karafka/pro/processing/filters_applier.rb +0 -105
  75. data/lib/karafka/pro/processing/virtual_offset_manager.rb +0 -177
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for the Swarm capabilities.
5
+ #
6
+ # Karafka in the swarm mode will fork additional processes and use the parent process as a
7
+ # supervisor. This capability allows to run multiple processes alongside but saves some memory
8
+ # due to CoW.
9
+ module Swarm
10
+ class << self
11
+ # Raises an error if swarm is not supported on a given platform
12
+ def ensure_supported!
13
+ return if supported?
14
+
15
+ raise(
16
+ Errors::UnsupportedOptionError,
17
+ 'Swarm mode not supported on this platform'
18
+ )
19
+ end
20
+
21
+ # @return [Boolean] true if fork API and pidfd OS API are available, otherwise false
22
+ def supported?
23
+ ::Process.respond_to?(:fork) && Swarm::Pidfd.supported?
24
+ end
25
+ end
26
+ end
27
+ end
@@ -73,5 +73,3 @@ end
73
73
  # Visit the setup documentation to get started and enhance your experience.
74
74
  #
75
75
  # https://karafka.io/docs/Web-UI-Getting-Started
76
- #
77
- # Karafka::Web.enable!
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.3.0'
6
+ VERSION = '2.3.2'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -41,7 +41,7 @@ module Karafka
41
41
 
42
42
  # @return [WaterDrop::Producer] waterdrop messages producer
43
43
  def producer
44
- @producer ||= App.config.producer
44
+ App.config.producer
45
45
  end
46
46
 
47
47
  # @return [::Karafka::Monitor] monitor that we want to use
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0
4
+ version: 2.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2024-01-26 00:00:00.000000000 Z
38
+ date: 2024-02-16 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -162,6 +162,7 @@ files:
162
162
  - lib/karafka/cli/info.rb
163
163
  - lib/karafka/cli/install.rb
164
164
  - lib/karafka/cli/server.rb
165
+ - lib/karafka/cli/swarm.rb
165
166
  - lib/karafka/cli/topics.rb
166
167
  - lib/karafka/connection/client.rb
167
168
  - lib/karafka/connection/conductor.rb
@@ -186,6 +187,7 @@ files:
186
187
  - lib/karafka/errors.rb
187
188
  - lib/karafka/helpers/async.rb
188
189
  - lib/karafka/helpers/colorize.rb
190
+ - lib/karafka/helpers/config_importer.rb
189
191
  - lib/karafka/helpers/interval_runner.rb
190
192
  - lib/karafka/helpers/multi_delegator.rb
191
193
  - lib/karafka/instrumentation/assignments_tracker.rb
@@ -205,7 +207,9 @@ files:
205
207
  - lib/karafka/instrumentation/vendors/datadog/dashboard.json
206
208
  - lib/karafka/instrumentation/vendors/datadog/logger_listener.rb
207
209
  - lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb
210
+ - lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb
208
211
  - lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb
212
+ - lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb
209
213
  - lib/karafka/licenser.rb
210
214
  - lib/karafka/messages/batch_metadata.rb
211
215
  - lib/karafka/messages/builders/batch_metadata.rb
@@ -245,6 +249,9 @@ files:
245
249
  - lib/karafka/pro/loader.rb
246
250
  - lib/karafka/pro/processing/collapser.rb
247
251
  - lib/karafka/pro/processing/coordinator.rb
252
+ - lib/karafka/pro/processing/coordinators/errors_tracker.rb
253
+ - lib/karafka/pro/processing/coordinators/filters_applier.rb
254
+ - lib/karafka/pro/processing/coordinators/virtual_offset_manager.rb
248
255
  - lib/karafka/pro/processing/executor.rb
249
256
  - lib/karafka/pro/processing/expansions_selector.rb
250
257
  - lib/karafka/pro/processing/filters/base.rb
@@ -253,7 +260,6 @@ files:
253
260
  - lib/karafka/pro/processing/filters/inline_insights_delayer.rb
254
261
  - lib/karafka/pro/processing/filters/throttler.rb
255
262
  - lib/karafka/pro/processing/filters/virtual_limiter.rb
256
- - lib/karafka/pro/processing/filters_applier.rb
257
263
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
258
264
  - lib/karafka/pro/processing/jobs/periodic.rb
259
265
  - lib/karafka/pro/processing/jobs/periodic_non_blocking.rb
@@ -317,12 +323,12 @@ files:
317
323
  - lib/karafka/pro/processing/strategies/mom/vp.rb
318
324
  - lib/karafka/pro/processing/strategies/vp/default.rb
319
325
  - lib/karafka/pro/processing/strategy_selector.rb
320
- - lib/karafka/pro/processing/virtual_offset_manager.rb
321
326
  - lib/karafka/pro/routing/features/active_job.rb
322
327
  - lib/karafka/pro/routing/features/active_job/builder.rb
323
328
  - lib/karafka/pro/routing/features/base.rb
324
329
  - lib/karafka/pro/routing/features/dead_letter_queue.rb
325
330
  - lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb
331
+ - lib/karafka/pro/routing/features/dead_letter_queue/topic.rb
326
332
  - lib/karafka/pro/routing/features/delaying.rb
327
333
  - lib/karafka/pro/routing/features/delaying/config.rb
328
334
  - lib/karafka/pro/routing/features/delaying/contracts/topic.rb
@@ -383,6 +389,7 @@ files:
383
389
  - lib/karafka/pro/routing/features/virtual_partitions/config.rb
384
390
  - lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb
385
391
  - lib/karafka/pro/routing/features/virtual_partitions/topic.rb
392
+ - lib/karafka/pro/swarm/liveness_listener.rb
386
393
  - lib/karafka/process.rb
387
394
  - lib/karafka/processing/coordinator.rb
388
395
  - lib/karafka/processing/coordinators_buffer.rb
@@ -455,6 +462,12 @@ files:
455
462
  - lib/karafka/setup/config.rb
456
463
  - lib/karafka/setup/dsl.rb
457
464
  - lib/karafka/status.rb
465
+ - lib/karafka/swarm.rb
466
+ - lib/karafka/swarm/liveness_listener.rb
467
+ - lib/karafka/swarm/manager.rb
468
+ - lib/karafka/swarm/node.rb
469
+ - lib/karafka/swarm/pidfd.rb
470
+ - lib/karafka/swarm/supervisor.rb
458
471
  - lib/karafka/templates/application_consumer.rb.erb
459
472
  - lib/karafka/templates/example_consumer.rb.erb
460
473
  - lib/karafka/templates/karafka.rb.erb
metadata.gz.sig CHANGED
Binary file
@@ -1,105 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component under a commercial license.
4
- # This Karafka component is NOT licensed under LGPL.
5
- #
6
- # All of the commercial components are present in the lib/karafka/pro directory of this
7
- # repository and their usage requires commercial license agreement.
8
- #
9
- # Karafka has also commercial-friendly license, commercial support and commercial components.
10
- #
11
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
- # your code to Maciej Mensfeld.
13
-
14
- module Karafka
15
- module Pro
16
- module Processing
17
- # Applier for all filters we want to have. Whether related to limiting messages based
18
- # on the payload or any other things.
19
- #
20
- # From the outside world perspective, this encapsulates all the filters.
21
- # This means that this is the API we expose as a single filter, allowing us to control
22
- # the filtering via many filters easily.
23
- class FiltersApplier
24
- # @return [Array] registered filters array. Useful if we want to inject internal context
25
- # aware filters.
26
- attr_reader :filters
27
-
28
- # @param coordinator [Pro::Coordinator] pro coordinator
29
- def initialize(coordinator)
30
- # Builds filters out of their factories
31
- # We build it that way (providing topic and partition) because there may be a case where
32
- # someone wants to have a specific logic that is per topic or partition. Like for example
33
- # a case where there is a cache bypassing revocations for topic partition.
34
- #
35
- # We provide full Karafka routing topic here and not the name only, in case the filter
36
- # would be customized based on other topic settings (like VPs, etc)
37
- #
38
- # This setup allows for biggest flexibility also because topic object holds the reference
39
- # to the subscription group and consumer group
40
- @filters = coordinator.topic.filtering.factories.map do |factory|
41
- factory.call(coordinator.topic, coordinator.partition)
42
- end
43
- end
44
-
45
- # @param messages [Array<Karafka::Messages::Message>] array with messages from the
46
- # partition
47
- def apply!(messages)
48
- return unless active?
49
-
50
- @filters.each { |filter| filter.apply!(messages) }
51
- end
52
-
53
- # @return [Boolean] did we filter out any messages during filtering run
54
- def applied?
55
- return false unless active?
56
-
57
- !applied.empty?
58
- end
59
-
60
- # @return [Symbol] consumer post-filtering action that should be taken
61
- def action
62
- return :skip unless applied?
63
-
64
- # The highest priority is on a potential backoff from any of the filters because it is
65
- # the less risky (delay and continue later)
66
- return :pause if applied.any? { |filter| filter.action == :pause }
67
-
68
- # If none of the filters wanted to pause, we can check for any that would want to seek
69
- # and if there is any, we can go with this strategy
70
- return :seek if applied.any? { |filter| filter.action == :seek }
71
-
72
- :skip
73
- end
74
-
75
- # @return [Integer] minimum timeout we need to pause. This is the minimum for all the
76
- # filters to satisfy all of them.
77
- def timeout
78
- applied.map(&:timeout).compact.min || 0
79
- end
80
-
81
- # The first message we do need to get next time we poll. We use the minimum not to jump
82
- # accidentally by over any.
83
- # @return [Karafka::Messages::Message, nil] cursor message or nil if none
84
- # @note Cursor message can also return the offset in the time format
85
- def cursor
86
- return nil unless active?
87
-
88
- applied.map(&:cursor).compact.min_by(&:offset)
89
- end
90
-
91
- private
92
-
93
- # @return [Boolean] is filtering active
94
- def active?
95
- !@filters.empty?
96
- end
97
-
98
- # @return [Array<Object>] filters that applied any sort of messages limiting
99
- def applied
100
- @filters.select(&:applied?)
101
- end
102
- end
103
- end
104
- end
105
- end
@@ -1,177 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This Karafka component is a Pro component under a commercial license.
4
- # This Karafka component is NOT licensed under LGPL.
5
- #
6
- # All of the commercial components are present in the lib/karafka/pro directory of this
7
- # repository and their usage requires commercial license agreement.
8
- #
9
- # Karafka has also commercial-friendly license, commercial support and commercial components.
10
- #
11
- # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
- # your code to Maciej Mensfeld.
13
-
14
- module Karafka
15
- module Pro
16
- module Processing
17
- # Manager that keeps track of our offsets with the virtualization layer that are local
18
- # to given partition assignment. It allows for easier offset management for virtual
19
- # virtual partition cases as it provides us ability to mark as consumed and move the
20
- # real offset behind as expected.
21
- #
22
- # @note We still use the regular coordinator "real" offset management as we want to have
23
- # them as separated as possible because the real seek offset management is also used for
24
- # pausing, filtering and others and should not be impacted by the virtual one
25
- #
26
- # @note This manager is **not** thread-safe by itself. It should operate from coordinator
27
- # locked locations.
28
- class VirtualOffsetManager
29
- attr_reader :groups
30
-
31
- # @param topic [String]
32
- # @param partition [Integer]
33
- # @param offset_metadata_strategy [Symbol] what metadata should we select. That is, should
34
- # we use the most recent or one picked from the offset that is going to be committed
35
- #
36
- # @note We need topic and partition because we use a seek message (virtual) for real offset
37
- # management. We could keep real message reference but this can be memory consuming
38
- # and not worth it.
39
- def initialize(topic, partition, offset_metadata_strategy)
40
- @topic = topic
41
- @partition = partition
42
- @groups = []
43
- @marked = {}
44
- @offsets_metadata = {}
45
- @real_offset = -1
46
- @offset_metadata_strategy = offset_metadata_strategy
47
- @current_offset_metadata = nil
48
- end
49
-
50
- # Clears the manager for a next collective operation
51
- def clear
52
- @groups.clear
53
- @offsets_metadata.clear
54
- @current_offset_metadata = nil
55
- @marked.clear
56
- @real_offset = -1
57
- end
58
-
59
- # Registers an offset group coming from one virtual consumer. In order to move the real
60
- # underlying offset accordingly, we need to make sure to track the virtual consumers
61
- # offsets groups independently and only materialize the end result.
62
- #
63
- # @param offsets_group [Array<Integer>] offsets from one virtual consumer
64
- def register(offsets_group)
65
- @groups << offsets_group
66
-
67
- offsets_group.each { |offset| @marked[offset] = false }
68
- end
69
-
70
- # Marks given message as marked (virtually consumed).
71
- # We mark given message offset and other earlier offsets from the same group as done
72
- # and we can refresh our real offset representation based on that as it might have changed
73
- # to a newer real offset.
74
- # @param message [Karafka::Messages::Message] message coming from VP we want to mark
75
- # @param offset_metadata [String, nil] offset metadata. `nil` if none
76
- def mark(message, offset_metadata)
77
- offset = message.offset
78
-
79
- # Store metadata when we materialize the most stable offset
80
- @offsets_metadata[offset] = offset_metadata
81
- @current_offset_metadata = offset_metadata
82
-
83
- group = @groups.find { |reg_group| reg_group.include?(offset) }
84
-
85
- # This case can happen when someone uses MoM and wants to mark message from a previous
86
- # batch as consumed. We can add it, since the real offset refresh will point to it
87
- unless group
88
- group = [offset]
89
- @groups << group
90
- end
91
-
92
- position = group.index(offset)
93
-
94
- # Mark all previous messages from the same group also as virtually consumed
95
- group[0..position].each do |markable_offset|
96
- # Set previous messages metadata offset as the offset of higher one for overwrites
97
- # unless a different metadata were set explicitely
98
- @offsets_metadata[markable_offset] ||= offset_metadata
99
- @marked[markable_offset] = true
100
- end
101
-
102
- # Recompute the real offset representation
103
- materialize_real_offset
104
- end
105
-
106
- # Mark all from all groups including the `message`.
107
- # Useful when operating in a collapsed state for marking
108
- # @param message [Karafka::Messages::Message]
109
- # @param offset_metadata [String, nil]
110
- def mark_until(message, offset_metadata)
111
- mark(message, offset_metadata)
112
-
113
- @groups.each do |group|
114
- group.each do |offset|
115
- next if offset > message.offset
116
-
117
- @offsets_metadata[offset] = offset_metadata
118
- @marked[offset] = true
119
- end
120
- end
121
-
122
- materialize_real_offset
123
- end
124
-
125
- # @return [Array<Integer>] Offsets of messages already marked as consumed virtually
126
- def marked
127
- @marked.select { |_, status| status }.map(&:first).sort
128
- end
129
-
130
- # Is there a real offset we can mark as consumed
131
- # @return [Boolean]
132
- def markable?
133
- !@real_offset.negative?
134
- end
135
-
136
- # @return [Array<Messages::Seek, String>] markable message for real offset marking and
137
- # its associated metadata
138
- def markable
139
- raise Errors::InvalidRealOffsetUsageError unless markable?
140
-
141
- offset_metadata = case @offset_metadata_strategy
142
- when :exact
143
- @offsets_metadata.fetch(@real_offset)
144
- when :current
145
- @current_offset_metadata
146
- else
147
- raise Errors::UnsupportedCaseError, @offset_metadata_strategy
148
- end
149
-
150
- [
151
- Messages::Seek.new(
152
- @topic,
153
- @partition,
154
- @real_offset
155
- ),
156
- offset_metadata
157
- ]
158
- end
159
-
160
- private
161
-
162
- # Recomputes the biggest possible real offset we can have.
163
- # It picks the the biggest offset that has uninterrupted stream of virtually marked as
164
- # consumed because this will be the collective offset.
165
- def materialize_real_offset
166
- @marked.to_a.sort_by(&:first).each do |offset, marked|
167
- break unless marked
168
-
169
- @real_offset = offset
170
- end
171
-
172
- @real_offset = (@marked.keys.min - 1) if @real_offset.negative?
173
- end
174
- end
175
- end
176
- end
177
- end