karafka 2.4.18 → 2.5.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +58 -14
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +53 -0
- data/Gemfile +3 -3
- data/Gemfile.lock +55 -58
- data/LICENSE-COMM +2 -2
- data/bin/clean_kafka +43 -0
- data/bin/integrations +17 -5
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +3 -0
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/json/enrollment_event.json +579 -0
- data/examples/payloads/json/ingestion_event.json +30 -0
- data/examples/payloads/json/transaction_event.json +17 -0
- data/examples/payloads/json/user_event.json +11 -0
- data/karafka.gemspec +3 -3
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin.rb +51 -19
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/connection/client.rb +20 -7
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +3 -0
- data/lib/karafka/contracts/config.rb +3 -0
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +11 -0
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +86 -23
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +8 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +13 -0
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +14 -15
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages.rb +13 -0
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +5 -0
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +22 -1
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +4 -0
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- data.tar.gz.sig +0 -0
- metadata +36 -11
- metadata.gz.sig +0 -0
@@ -20,16 +20,16 @@ module Karafka
|
|
20
20
|
|
21
21
|
# Pipes given message to the provided topic with expected details. Useful for
|
22
22
|
# pass-through operations where deserialization is not needed. Upon usage it will include
|
23
|
-
# all the
|
23
|
+
# all the source headers + meta headers about the source of message.
|
24
24
|
#
|
25
25
|
# @param topic [String, Symbol] where we want to send the message
|
26
|
-
# @param message [Karafka::Messages::Message]
|
26
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
27
27
|
#
|
28
28
|
# @note It will NOT deserialize the payload so it is fast
|
29
29
|
#
|
30
30
|
# @note We assume that there can be different number of partitions in the target topic,
|
31
|
-
# this is why we use `key` based on the
|
32
|
-
# This will not utilize partitions beyond the number of partitions of
|
31
|
+
# this is why we use `key` based on the source topic key and not the partition id.
|
32
|
+
# This will not utilize partitions beyond the number of partitions of source topic,
|
33
33
|
# but will accommodate for topics with less partitions.
|
34
34
|
def pipe_async(topic:, message:)
|
35
35
|
produce_async(
|
@@ -40,7 +40,7 @@ module Karafka
|
|
40
40
|
# Sync version of pipe for one message
|
41
41
|
#
|
42
42
|
# @param topic [String, Symbol] where we want to send the message
|
43
|
-
# @param message [Karafka::Messages::Message]
|
43
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
44
44
|
# @see [#pipe_async]
|
45
45
|
def pipe_sync(topic:, message:)
|
46
46
|
produce_sync(
|
@@ -51,7 +51,7 @@ module Karafka
|
|
51
51
|
# Async multi-message pipe
|
52
52
|
#
|
53
53
|
# @param topic [String, Symbol] where we want to send the message
|
54
|
-
# @param messages [Array<Karafka::Messages::Message>]
|
54
|
+
# @param messages [Array<Karafka::Messages::Message>] source messages to pipe
|
55
55
|
#
|
56
56
|
# @note If transactional producer in use and dispatch is not wrapped with a transaction,
|
57
57
|
# it will automatically wrap the dispatch with a transaction
|
@@ -66,7 +66,7 @@ module Karafka
|
|
66
66
|
# Sync multi-message pipe
|
67
67
|
#
|
68
68
|
# @param topic [String, Symbol] where we want to send the message
|
69
|
-
# @param messages [Array<Karafka::Messages::Message>]
|
69
|
+
# @param messages [Array<Karafka::Messages::Message>] source messages to pipe
|
70
70
|
#
|
71
71
|
# @note If transactional producer in use and dispatch is not wrapped with a transaction,
|
72
72
|
# it will automatically wrap the dispatch with a transaction
|
@@ -81,7 +81,7 @@ module Karafka
|
|
81
81
|
private
|
82
82
|
|
83
83
|
# @param topic [String, Symbol] where we want to send the message
|
84
|
-
# @param message [Karafka::Messages::Message]
|
84
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
85
85
|
# @return [Hash] hash with message to pipe.
|
86
86
|
#
|
87
87
|
# @note If you need to alter this, please define the `#enhance_pipe_message` method
|
@@ -90,17 +90,17 @@ module Karafka
|
|
90
90
|
topic: topic,
|
91
91
|
payload: message.raw_payload,
|
92
92
|
headers: message.raw_headers.merge(
|
93
|
-
'
|
94
|
-
'
|
95
|
-
'
|
96
|
-
'
|
93
|
+
'source_topic' => message.topic,
|
94
|
+
'source_partition' => message.partition.to_s,
|
95
|
+
'source_offset' => message.offset.to_s,
|
96
|
+
'source_consumer_group' => self.topic.consumer_group.id
|
97
97
|
)
|
98
98
|
}
|
99
99
|
|
100
100
|
# Use a key only if key was provided
|
101
101
|
if message.raw_key
|
102
102
|
pipe_message[:key] = message.raw_key
|
103
|
-
# Otherwise pipe creating a key that will assign it based on the
|
103
|
+
# Otherwise pipe creating a key that will assign it based on the source partition
|
104
104
|
# number
|
105
105
|
else
|
106
106
|
pipe_message[:key] = message.partition.to_s
|
@@ -40,7 +40,7 @@ module Karafka
|
|
40
40
|
mark_as_consumed(last_group_message) unless revoked?
|
41
41
|
# no need to check for manual seek because AJ consumer is internal and
|
42
42
|
# fully controlled by us
|
43
|
-
seek(seek_offset, false) unless revoked?
|
43
|
+
seek(seek_offset, false, reset_offset: false) unless revoked?
|
44
44
|
|
45
45
|
resume
|
46
46
|
else
|
@@ -55,14 +55,19 @@ module Karafka
|
|
55
55
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
56
56
|
# In case like this we ignore marking
|
57
57
|
return true if seek_offset.nil?
|
58
|
-
# Ignore
|
59
|
-
|
58
|
+
# Ignore if it is the same offset as the one that is marked currently
|
59
|
+
# We ignore second marking because it changes nothing and in case of people using
|
60
|
+
# metadata storage but with automatic offset marking, this would cause metadata to be
|
61
|
+
# erased by automatic marking
|
62
|
+
return true if (seek_offset - 1) == message.offset
|
60
63
|
return false if revoked?
|
61
64
|
|
62
65
|
# If we are not inside a transaction but this is a transactional topic, we mark with
|
63
66
|
# artificially created transaction
|
64
67
|
stored = if producer.transactional?
|
65
68
|
mark_with_transaction(message, offset_metadata, true)
|
69
|
+
elsif @_transactional_marking
|
70
|
+
raise Errors::NonTransactionalMarkingAttemptError
|
66
71
|
else
|
67
72
|
client.mark_as_consumed(message, offset_metadata)
|
68
73
|
end
|
@@ -92,14 +97,19 @@ module Karafka
|
|
92
97
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
93
98
|
# In case like this we ignore marking
|
94
99
|
return true if seek_offset.nil?
|
95
|
-
# Ignore
|
96
|
-
|
100
|
+
# Ignore if it is the same offset as the one that is marked currently
|
101
|
+
# We ignore second marking because it changes nothing and in case of people using
|
102
|
+
# metadata storage but with automatic offset marking, this would cause metadata to be
|
103
|
+
# erased by automatic marking
|
104
|
+
return true if (seek_offset - 1) == message.offset
|
97
105
|
return false if revoked?
|
98
106
|
|
99
107
|
# If we are not inside a transaction but this is a transactional topic, we mark with
|
100
108
|
# artificially created transaction
|
101
109
|
stored = if producer.transactional?
|
102
110
|
mark_with_transaction(message, offset_metadata, false)
|
111
|
+
elsif @_transactional_marking
|
112
|
+
raise Errors::NonTransactionalMarkingAttemptError
|
103
113
|
else
|
104
114
|
client.mark_as_consumed!(message, offset_metadata)
|
105
115
|
end
|
@@ -143,6 +153,7 @@ module Karafka
|
|
143
153
|
self.producer = active_producer
|
144
154
|
|
145
155
|
transaction_started = false
|
156
|
+
transaction_completed = false
|
146
157
|
|
147
158
|
# Prevent from nested transactions. It would not make any sense
|
148
159
|
raise Errors::TransactionAlreadyInitializedError if @_in_transaction
|
@@ -159,6 +170,12 @@ module Karafka
|
|
159
170
|
# transaction. We do it only for transactions that contain offset management as for
|
160
171
|
# producer only, this is not relevant.
|
161
172
|
raise Errors::AssignmentLostError if @_in_transaction_marked && revoked?
|
173
|
+
|
174
|
+
# If we do not reach this, we should not move seek offsets because it means that
|
175
|
+
# either an error occured or transaction was aborted.
|
176
|
+
# In case of error, it will bubble up so no issue but in case of abort, while we
|
177
|
+
# do not reach this place, the code will continue
|
178
|
+
transaction_completed = true
|
162
179
|
end
|
163
180
|
|
164
181
|
@_in_transaction = false
|
@@ -180,8 +197,13 @@ module Karafka
|
|
180
197
|
# to mimic this
|
181
198
|
# - Complex strategies like VPs can use this in VPs to mark in parallel without
|
182
199
|
# having to redefine the transactional flow completely
|
183
|
-
|
184
|
-
|
200
|
+
#
|
201
|
+
# @note This should be applied only if transaction did not error and if it was not
|
202
|
+
# aborted.
|
203
|
+
if transaction_completed
|
204
|
+
@_transaction_marked.each do |marking|
|
205
|
+
marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
|
206
|
+
end
|
185
207
|
end
|
186
208
|
|
187
209
|
true
|
@@ -213,6 +235,9 @@ module Karafka
|
|
213
235
|
offset_metadata
|
214
236
|
)
|
215
237
|
|
238
|
+
# This one is long lived and used to make sure, that users do not mix transactional
|
239
|
+
# marking with non-transactional. When this happens we should raise error
|
240
|
+
@_transactional_marking = true
|
216
241
|
@_in_transaction_marked = true
|
217
242
|
@_transaction_marked ||= []
|
218
243
|
@_transaction_marked << [message, offset_metadata, async]
|
@@ -252,8 +277,11 @@ module Karafka
|
|
252
277
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
253
278
|
# In case like this we ignore marking
|
254
279
|
return true if seek_offset.nil?
|
255
|
-
# Ignore
|
256
|
-
|
280
|
+
# Ignore if it is the same offset as the one that is marked currently
|
281
|
+
# We ignore second marking because it changes nothing and in case of people using
|
282
|
+
# metadata storage but with automatic offset marking, this would cause metadata to be
|
283
|
+
# erased by automatic marking
|
284
|
+
return true if (seek_offset - 1) == message.offset
|
257
285
|
return false if revoked?
|
258
286
|
|
259
287
|
# If we have already marked this successfully in a transaction that was running
|
@@ -145,19 +145,19 @@ module Karafka
|
|
145
145
|
# @param skippable_message [Array<Karafka::Messages::Message>]
|
146
146
|
# @return [Hash] dispatch DLQ message
|
147
147
|
def build_dlq_message(skippable_message)
|
148
|
-
|
148
|
+
source_partition = skippable_message.partition.to_s
|
149
149
|
|
150
150
|
dlq_message = {
|
151
|
-
topic: topic.dead_letter_queue.topic,
|
152
|
-
key:
|
151
|
+
topic: @_dispatch_to_dlq_topic || topic.dead_letter_queue.topic,
|
152
|
+
key: source_partition,
|
153
153
|
payload: skippable_message.raw_payload,
|
154
154
|
headers: skippable_message.raw_headers.merge(
|
155
|
-
'
|
156
|
-
'
|
157
|
-
'
|
158
|
-
'
|
159
|
-
'
|
160
|
-
'
|
155
|
+
'source_topic' => topic.name,
|
156
|
+
'source_partition' => source_partition,
|
157
|
+
'source_offset' => skippable_message.offset.to_s,
|
158
|
+
'source_consumer_group' => topic.consumer_group.id,
|
159
|
+
'source_key' => skippable_message.raw_key.to_s,
|
160
|
+
'source_attempts' => attempt.to_s
|
161
161
|
)
|
162
162
|
}
|
163
163
|
|
@@ -205,7 +205,7 @@ module Karafka
|
|
205
205
|
# In case of `:skip` and `:dispatch` will run the exact flow provided in a block
|
206
206
|
# In case of `:retry` always `#retry_after_pause` is applied
|
207
207
|
def apply_dlq_flow
|
208
|
-
flow = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
|
208
|
+
flow, target_topic = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
|
209
209
|
|
210
210
|
case flow
|
211
211
|
when :retry
|
@@ -216,6 +216,8 @@ module Karafka
|
|
216
216
|
@_dispatch_to_dlq = false
|
217
217
|
when :dispatch
|
218
218
|
@_dispatch_to_dlq = true
|
219
|
+
# Use custom topic if it was returned from the strategy
|
220
|
+
@_dispatch_to_dlq_topic = target_topic || topic.dead_letter_queue.topic
|
219
221
|
else
|
220
222
|
raise Karafka::UnsupportedCaseError, flow
|
221
223
|
end
|
@@ -227,6 +229,8 @@ module Karafka
|
|
227
229
|
|
228
230
|
# Always backoff after DLQ dispatch even on skip to prevent overloads on errors
|
229
231
|
pause(seek_offset, nil, false)
|
232
|
+
ensure
|
233
|
+
@_dispatch_to_dlq_topic = nil
|
230
234
|
end
|
231
235
|
|
232
236
|
# Marks message that went to DLQ (if applicable) based on the requested method
|
@@ -35,7 +35,7 @@ module Karafka
|
|
35
35
|
if coordinator.filtered? && !revoked?
|
36
36
|
handle_post_filtering
|
37
37
|
elsif !revoked? && !coordinator.manual_seek?
|
38
|
-
seek(last_group_message.offset + 1, false)
|
38
|
+
seek(last_group_message.offset + 1, false, reset_offset: false)
|
39
39
|
resume
|
40
40
|
else
|
41
41
|
resume
|
@@ -31,7 +31,9 @@ module Karafka
|
|
31
31
|
|
32
32
|
mark_as_consumed(last_group_message) unless revoked?
|
33
33
|
# We should not overwrite user manual seel request with our seek
|
34
|
-
|
34
|
+
unless revoked? || coordinator.manual_seek?
|
35
|
+
seek(seek_offset, false, reset_offset: false)
|
36
|
+
end
|
35
37
|
|
36
38
|
resume
|
37
39
|
else
|
@@ -43,7 +43,10 @@ module Karafka
|
|
43
43
|
return if coordinator.manual_pause?
|
44
44
|
|
45
45
|
mark_as_consumed(last_group_message) unless revoked?
|
46
|
-
|
46
|
+
|
47
|
+
unless revoked? || coordinator.manual_seek?
|
48
|
+
seek(seek_offset, false, reset_offset: false)
|
49
|
+
end
|
47
50
|
|
48
51
|
resume
|
49
52
|
else
|
@@ -38,7 +38,7 @@ module Karafka
|
|
38
38
|
elsif !revoked? && !coordinator.manual_seek?
|
39
39
|
# If not revoked and not throttled, we move to where we were suppose to and
|
40
40
|
# resume
|
41
|
-
seek(last_group_message.offset + 1, false)
|
41
|
+
seek(last_group_message.offset + 1, false, reset_offset: false)
|
42
42
|
resume
|
43
43
|
else
|
44
44
|
resume
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
module VirtualPartitions
|
10
|
+
module Distributors
|
11
|
+
# Balanced distributor that groups messages by partition key
|
12
|
+
# and processes larger groups first while maintaining message order within groups
|
13
|
+
class Balanced < Base
|
14
|
+
# @param messages [Array<Karafka::Messages::Message>] messages to distribute
|
15
|
+
# @return [Hash<Integer, Array<Karafka::Messages::Message>>] hash with group ids as
|
16
|
+
# keys and message groups as values
|
17
|
+
def call(messages)
|
18
|
+
# Group messages by partition key
|
19
|
+
key_groupings = messages.group_by { |msg| config.partitioner.call(msg) }
|
20
|
+
|
21
|
+
worker_loads = Array.new(config.max_partitions, 0)
|
22
|
+
worker_assignments = Array.new(config.max_partitions) { [] }
|
23
|
+
|
24
|
+
# Sort keys by workload in descending order
|
25
|
+
sorted_keys = key_groupings.keys.sort_by { |key| -key_groupings[key].size }
|
26
|
+
|
27
|
+
# Assign each key to the worker with the least current load
|
28
|
+
sorted_keys.each do |key|
|
29
|
+
# Find worker with minimum current load
|
30
|
+
min_load_worker = worker_loads.each_with_index.min_by { |load, _| load }[1]
|
31
|
+
messages = key_groupings[key]
|
32
|
+
|
33
|
+
# Assign this key to that worker
|
34
|
+
worker_assignments[min_load_worker] += messages
|
35
|
+
worker_loads[min_load_worker] += messages.size
|
36
|
+
end
|
37
|
+
|
38
|
+
# Combine messages for each worker and sort by offset
|
39
|
+
worker_assignments
|
40
|
+
.each_with_index
|
41
|
+
.reject { |group_messages, _| group_messages.empty? }
|
42
|
+
.map! { |group_messages, index| [index, group_messages.sort_by!(&:offset)] }
|
43
|
+
.to_h
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
# Processing components for virtual partitions
|
10
|
+
module VirtualPartitions
|
11
|
+
# Distributors for virtual partitions
|
12
|
+
module Distributors
|
13
|
+
# Base class for all virtual partition distributors
|
14
|
+
class Base
|
15
|
+
# @param config [Karafka::Pro::Routing::Features::VirtualPartitions::Config]
|
16
|
+
def initialize(config)
|
17
|
+
@config = config
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
# @return [Karafka::Pro::Routing::Features::VirtualPartitions::Config]
|
23
|
+
attr_reader :config
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
module VirtualPartitions
|
10
|
+
module Distributors
|
11
|
+
# Consistent distributor that ensures messages with the same partition key
|
12
|
+
# are always processed in the same virtual partition
|
13
|
+
class Consistent < Base
|
14
|
+
# @param messages [Array<Karafka::Messages::Message>] messages to distribute
|
15
|
+
# @return [Hash<Integer, Array<Karafka::Messages::Message>>] hash with group ids as
|
16
|
+
# keys and message groups as values
|
17
|
+
def call(messages)
|
18
|
+
messages
|
19
|
+
.group_by { |msg| config.reducer.call(config.partitioner.call(msg)) }
|
20
|
+
.to_h
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -29,12 +29,16 @@ module Karafka
|
|
29
29
|
end
|
30
30
|
|
31
31
|
nested(:topics) do
|
32
|
-
|
33
|
-
|
32
|
+
nested(:schedules) do
|
33
|
+
required(:name) do |val|
|
34
|
+
val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
|
35
|
+
end
|
34
36
|
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
+
nested(:logs) do
|
39
|
+
required(:name) do |val|
|
40
|
+
val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
|
41
|
+
end
|
38
42
|
end
|
39
43
|
end
|
40
44
|
end
|
@@ -12,7 +12,7 @@ module Karafka
|
|
12
12
|
# Snapshots to Kafka current schedule state
|
13
13
|
def schedule
|
14
14
|
produce(
|
15
|
-
topics.schedules,
|
15
|
+
topics.schedules.name,
|
16
16
|
'state:schedule',
|
17
17
|
serializer.schedule(::Karafka::Pro::RecurringTasks.schedule)
|
18
18
|
)
|
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
# because in the web ui we work with the full name and it is easier. Since
|
26
26
|
def command(name, task_id)
|
27
27
|
produce(
|
28
|
-
topics.schedules,
|
28
|
+
topics.schedules.name,
|
29
29
|
"command:#{name}:#{task_id}",
|
30
30
|
serializer.command(name, task_id)
|
31
31
|
)
|
@@ -35,7 +35,7 @@ module Karafka
|
|
35
35
|
# @param event [Karafka::Core::Monitoring::Event]
|
36
36
|
def log(event)
|
37
37
|
produce(
|
38
|
-
topics.logs,
|
38
|
+
topics.logs.name,
|
39
39
|
event[:task].id,
|
40
40
|
serializer.log(event)
|
41
41
|
)
|
@@ -32,8 +32,13 @@ module Karafka
|
|
32
32
|
)
|
33
33
|
|
34
34
|
setting(:topics) do
|
35
|
-
setting(:schedules
|
36
|
-
|
35
|
+
setting(:schedules) do
|
36
|
+
setting(:name, default: 'karafka_recurring_tasks_schedules')
|
37
|
+
end
|
38
|
+
|
39
|
+
setting(:logs) do
|
40
|
+
setting(:name, default: 'karafka_recurring_tasks_logs')
|
41
|
+
end
|
37
42
|
end
|
38
43
|
|
39
44
|
configure
|
@@ -73,6 +73,19 @@ module Karafka
|
|
73
73
|
|
74
74
|
Karafka.monitor.subscribe(Listener.new)
|
75
75
|
end
|
76
|
+
|
77
|
+
# Basically since we may have custom producers configured that are not the same as the
|
78
|
+
# default one, we hold a reference to old pre-fork producer. This means, that when we
|
79
|
+
# initialize it again in post-fork, as long as user uses defaults we should re-inherit
|
80
|
+
# it from the default config.
|
81
|
+
#
|
82
|
+
# @param config [Karafka::Core::Configurable::Node]
|
83
|
+
# @param pre_fork_producer [WaterDrop::Producer]
|
84
|
+
def post_fork(config, pre_fork_producer)
|
85
|
+
return unless config.recurring_tasks.producer == pre_fork_producer
|
86
|
+
|
87
|
+
config.recurring_tasks.producer = config.producer
|
88
|
+
end
|
76
89
|
end
|
77
90
|
end
|
78
91
|
end
|
@@ -12,7 +12,7 @@ module Karafka
|
|
12
12
|
module Topic
|
13
13
|
# @param strategy [#call, nil] Strategy we want to use or nil if a default strategy
|
14
14
|
# (same as in OSS) should be applied
|
15
|
-
# @param args [Hash]
|
15
|
+
# @param args [Hash] Pro DLQ arguments
|
16
16
|
def dead_letter_queue(strategy: nil, **args)
|
17
17
|
return @dead_letter_queue if @dead_letter_queue
|
18
18
|
|
@@ -28,6 +28,7 @@ module Karafka
|
|
28
28
|
optional(:multiplexing_min) { |val| val.is_a?(Integer) && val >= 1 }
|
29
29
|
optional(:multiplexing_max) { |val| val.is_a?(Integer) && val >= 1 }
|
30
30
|
optional(:multiplexing_boot) { |val| val.is_a?(Integer) && val >= 1 }
|
31
|
+
optional(:multiplexing_scale_delay) { |val| val.is_a?(Integer) && val >= 1_000 }
|
31
32
|
end
|
32
33
|
|
33
34
|
# Makes sure min is not more than max
|
@@ -78,6 +79,22 @@ module Karafka
|
|
78
79
|
[[%w[subscription_group_details], :multiplexing_boot_not_dynamic]]
|
79
80
|
end
|
80
81
|
|
82
|
+
# Makes sure we do not run multiplexing with 1 always which does not make much sense
|
83
|
+
# because then it behaves like without multiplexing and can create problems for
|
84
|
+
# users running multiplexed subscription groups with multiple topics
|
85
|
+
virtual do |data, errors|
|
86
|
+
next unless errors.empty?
|
87
|
+
next unless min(data)
|
88
|
+
next unless max(data)
|
89
|
+
|
90
|
+
min = min(data)
|
91
|
+
max = max(data)
|
92
|
+
|
93
|
+
next unless min == 1 && max == 1
|
94
|
+
|
95
|
+
[[%w[subscription_group_details], :multiplexing_one_not_enough]]
|
96
|
+
end
|
97
|
+
|
81
98
|
class << self
|
82
99
|
# @param data [Hash] topic details
|
83
100
|
# @return [Integer, false] min or false if missing
|