karafka 2.4.18 → 2.5.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +58 -14
- data/.github/workflows/push.yml +36 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +60 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +69 -50
- data/LICENSE-COMM +2 -2
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/bin/clean_kafka +43 -0
- data/bin/integrations +19 -6
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +3 -0
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/json/enrollment_event.json +579 -0
- data/examples/payloads/json/ingestion_event.json +30 -0
- data/examples/payloads/json/transaction_event.json +17 -0
- data/examples/payloads/json/user_event.json +11 -0
- data/karafka.gemspec +3 -8
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin/configs.rb +5 -1
- data/lib/karafka/admin.rb +69 -34
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/cli/topics/align.rb +7 -4
- data/lib/karafka/cli/topics/base.rb +17 -0
- data/lib/karafka/cli/topics/create.rb +9 -7
- data/lib/karafka/cli/topics/delete.rb +4 -2
- data/lib/karafka/cli/topics/help.rb +39 -0
- data/lib/karafka/cli/topics/repartition.rb +4 -2
- data/lib/karafka/cli/topics.rb +10 -3
- data/lib/karafka/cli.rb +2 -0
- data/lib/karafka/connection/client.rb +30 -9
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +3 -0
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/config.rb +3 -0
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +46 -2
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +86 -23
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +8 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +13 -0
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +19 -21
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages.rb +13 -0
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +19 -19
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +22 -1
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +4 -0
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- metadata +40 -40
- checksums.yaml.gz.sig +0 -0
- data/certs/cert.pem +0 -26
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
@@ -54,9 +54,9 @@ module Karafka
|
|
54
54
|
@applied = true
|
55
55
|
end
|
56
56
|
|
57
|
-
# @return [Integer] ms timeout in case of pause
|
57
|
+
# @return [Integer, nil] ms timeout in case of pause or nil if not delaying
|
58
58
|
def timeout
|
59
|
-
@cursor && applied? ? PAUSE_TIMEOUT :
|
59
|
+
@cursor && applied? ? PAUSE_TIMEOUT : nil
|
60
60
|
end
|
61
61
|
|
62
62
|
# Pause when we had to back-off or skip if delay is not needed
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
module ParallelSegments
|
10
|
+
# Module for filters injected into the processing pipeline of each of the topics used
|
11
|
+
# within the parallel segmented consumer groups
|
12
|
+
module Filters
|
13
|
+
# Base class for filters for parallel segments that deal with different feature scenarios
|
14
|
+
class Base < Processing::Filters::Base
|
15
|
+
# @param segment_id [Integer] numeric id of the parallel segment group to use with the
|
16
|
+
# partitioner and reducer for segment matching comparison
|
17
|
+
# @param partitioner [Proc]
|
18
|
+
# @param reducer [Proc]
|
19
|
+
def initialize(segment_id:, partitioner:, reducer:)
|
20
|
+
super()
|
21
|
+
|
22
|
+
@segment_id = segment_id
|
23
|
+
@partitioner = partitioner
|
24
|
+
@reducer = reducer
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
# @param message [Karafka::Messages::Message] received message
|
30
|
+
# @return [String, Numeric] segment assignment key
|
31
|
+
def partition(message)
|
32
|
+
@partitioner.call(message)
|
33
|
+
rescue StandardError => e
|
34
|
+
# This should not happen. If you are seeing this it means your partitioner code
|
35
|
+
# failed and raised an error. We highly recommend mitigating partitioner level errors
|
36
|
+
# on the user side because this type of collapse should be considered a last resort
|
37
|
+
Karafka.monitor.instrument(
|
38
|
+
'error.occurred',
|
39
|
+
caller: self,
|
40
|
+
error: e,
|
41
|
+
message: message,
|
42
|
+
type: 'parallel_segments.partitioner.error'
|
43
|
+
)
|
44
|
+
|
45
|
+
:failure
|
46
|
+
end
|
47
|
+
|
48
|
+
# @param message_segment_key [String, Numeric] segment key to pass to the reducer
|
49
|
+
# @return [Integer] segment assignment of a given message
|
50
|
+
def reduce(message_segment_key)
|
51
|
+
# Assign to segment 0 always in case of failures in partitioner
|
52
|
+
# This is a fail-safe
|
53
|
+
return 0 if message_segment_key == :failure
|
54
|
+
|
55
|
+
@reducer.call(message_segment_key)
|
56
|
+
rescue StandardError => e
|
57
|
+
# @see `#partition` method error handling doc
|
58
|
+
Karafka.monitor.instrument(
|
59
|
+
'error.occurred',
|
60
|
+
caller: self,
|
61
|
+
error: e,
|
62
|
+
message_segment_key: message_segment_key,
|
63
|
+
type: 'parallel_segments.reducer.error'
|
64
|
+
)
|
65
|
+
|
66
|
+
0
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
# Processing components namespace for parallel segments feature
|
10
|
+
module ParallelSegments
|
11
|
+
module Filters
|
12
|
+
# Filter used for handling parallel segments with automatic offset management. Handles
|
13
|
+
# message distribution and ensures proper offset management when messages are filtered
|
14
|
+
# out during the distribution process.
|
15
|
+
#
|
16
|
+
# When operating in automatic offset management mode, this filter takes care of marking
|
17
|
+
# offsets of messages that were filtered out during the distribution process to maintain
|
18
|
+
# proper offset progression.
|
19
|
+
#
|
20
|
+
# @note This is the default filter that should be used when manual offset management
|
21
|
+
# is not enabled. For manual offset management scenarios use the Mom filter instead.
|
22
|
+
class Default < Base
|
23
|
+
# Applies the filter to the batch of messages
|
24
|
+
# It removes messages that don't belong to the current parallel segment group
|
25
|
+
# based on the partitioner and reducer logic
|
26
|
+
#
|
27
|
+
# @param messages [Array<Karafka::Messages::Message>] messages batch that we want to
|
28
|
+
# filter
|
29
|
+
def apply!(messages)
|
30
|
+
@applied = false
|
31
|
+
@all_filtered = false
|
32
|
+
@cursor = messages.first
|
33
|
+
|
34
|
+
# Keep track of how many messages we had initially
|
35
|
+
initial_size = messages.size
|
36
|
+
|
37
|
+
# Filter out messages that don't match our segment group
|
38
|
+
messages.delete_if do |message|
|
39
|
+
message_segment_key = partition(message)
|
40
|
+
|
41
|
+
# Use the reducer to get the target group for this message
|
42
|
+
target_segment = reduce(message_segment_key)
|
43
|
+
|
44
|
+
# Remove the message if it doesn't belong to our group
|
45
|
+
remove = target_segment != @segment_id
|
46
|
+
|
47
|
+
if remove
|
48
|
+
@cursor = message
|
49
|
+
@applied = true
|
50
|
+
end
|
51
|
+
|
52
|
+
remove
|
53
|
+
end
|
54
|
+
|
55
|
+
# If all messages were filtered out, we want to mark them as consumed
|
56
|
+
@all_filtered = messages.empty? && initial_size.positive?
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [Boolean] true if any messages were filtered out
|
60
|
+
def applied?
|
61
|
+
@applied
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [Boolean] true if we should mark as consumed (when all were filtered)
|
65
|
+
def mark_as_consumed?
|
66
|
+
@all_filtered
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [nil] Since we do not timeout ever in this filter, we should not return
|
70
|
+
# any value for it.
|
71
|
+
def timeout
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
|
75
|
+
# Only return cursor if we wanted to mark as consumed in case all was filtered.
|
76
|
+
# Otherwise it could interfere with other filters
|
77
|
+
def cursor
|
78
|
+
@all_filtered ? @cursor : nil
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
module ParallelSegments
|
10
|
+
module Filters
|
11
|
+
# Filter used for handling parallel segments when manual offset management (mom) is
|
12
|
+
# enabled. Provides message distribution without any post-filtering offset state
|
13
|
+
# management as it is fully user-based.
|
14
|
+
#
|
15
|
+
# Since with manual offset management we need to ensure that offsets are never marked
|
16
|
+
# even in cases where all data in a batch is filtered out.
|
17
|
+
#
|
18
|
+
# This separation allows for cleaner implementation and easier debugging of each flow.
|
19
|
+
#
|
20
|
+
# @note This filter should be used only when manual offset management is enabled.
|
21
|
+
# For automatic offset management scenarios use the regular filter instead.
|
22
|
+
class Mom < Base
|
23
|
+
# Applies the filter to the batch of messages
|
24
|
+
# It removes messages that don't belong to the current parallel segment group
|
25
|
+
# based on the partitioner and reducer logic without any offset marking
|
26
|
+
#
|
27
|
+
# @param messages [Array<Karafka::Messages::Message>] messages batch that we want to
|
28
|
+
# filter
|
29
|
+
def apply!(messages)
|
30
|
+
@applied = false
|
31
|
+
|
32
|
+
# Filter out messages that don't match our segment group
|
33
|
+
messages.delete_if do |message|
|
34
|
+
message_segment_key = partition(message)
|
35
|
+
# Use the reducer to get the target group for this message
|
36
|
+
target_segment = reduce(message_segment_key)
|
37
|
+
# Remove the message if it doesn't belong to our segment
|
38
|
+
remove = target_segment != @segment_id
|
39
|
+
|
40
|
+
@applied = true if remove
|
41
|
+
|
42
|
+
remove
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Boolean] true if any messages were filtered out
|
47
|
+
def applied?
|
48
|
+
@applied
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [Boolean] false, as mom mode never marks as consumed automatically
|
52
|
+
def mark_as_consumed?
|
53
|
+
false
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return [nil] Since we do not timeout ever in this filter, we should not return
|
57
|
+
# any value for it.
|
58
|
+
def timeout
|
59
|
+
nil
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -38,19 +38,7 @@ module Karafka
|
|
38
38
|
# reduce the whole set into one partition and emit error. This should still allow for
|
39
39
|
# user flow but should mitigate damages by not virtualizing
|
40
40
|
begin
|
41
|
-
groupings = messages
|
42
|
-
# We need to reduce it to the max concurrency, so the group_id is not a direct
|
43
|
-
# effect of the end user action. Otherwise the persistence layer for consumers
|
44
|
-
# would cache it forever and it would cause memory leaks
|
45
|
-
#
|
46
|
-
# This also needs to be consistent because the aggregation here needs to warrant,
|
47
|
-
# that the same partitioned message will always be assigned to the same virtual
|
48
|
-
# partition. Otherwise in case of a window aggregation with VP spanning across
|
49
|
-
# several polls, the data could not be complete.
|
50
|
-
vps.reducer.call(
|
51
|
-
vps.partitioner.call(msg)
|
52
|
-
)
|
53
|
-
end
|
41
|
+
groupings = vps.distributor.call(messages)
|
54
42
|
rescue StandardError => e
|
55
43
|
# This should not happen. If you are seeing this it means your partitioner code
|
56
44
|
# failed and raised an error. We highly recommend mitigating partitioner level errors
|
@@ -20,16 +20,16 @@ module Karafka
|
|
20
20
|
|
21
21
|
# Pipes given message to the provided topic with expected details. Useful for
|
22
22
|
# pass-through operations where deserialization is not needed. Upon usage it will include
|
23
|
-
# all the
|
23
|
+
# all the source headers + meta headers about the source of message.
|
24
24
|
#
|
25
25
|
# @param topic [String, Symbol] where we want to send the message
|
26
|
-
# @param message [Karafka::Messages::Message]
|
26
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
27
27
|
#
|
28
28
|
# @note It will NOT deserialize the payload so it is fast
|
29
29
|
#
|
30
30
|
# @note We assume that there can be different number of partitions in the target topic,
|
31
|
-
# this is why we use `key` based on the
|
32
|
-
# This will not utilize partitions beyond the number of partitions of
|
31
|
+
# this is why we use `key` based on the source topic key and not the partition id.
|
32
|
+
# This will not utilize partitions beyond the number of partitions of source topic,
|
33
33
|
# but will accommodate for topics with less partitions.
|
34
34
|
def pipe_async(topic:, message:)
|
35
35
|
produce_async(
|
@@ -40,7 +40,7 @@ module Karafka
|
|
40
40
|
# Sync version of pipe for one message
|
41
41
|
#
|
42
42
|
# @param topic [String, Symbol] where we want to send the message
|
43
|
-
# @param message [Karafka::Messages::Message]
|
43
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
44
44
|
# @see [#pipe_async]
|
45
45
|
def pipe_sync(topic:, message:)
|
46
46
|
produce_sync(
|
@@ -51,7 +51,7 @@ module Karafka
|
|
51
51
|
# Async multi-message pipe
|
52
52
|
#
|
53
53
|
# @param topic [String, Symbol] where we want to send the message
|
54
|
-
# @param messages [Array<Karafka::Messages::Message>]
|
54
|
+
# @param messages [Array<Karafka::Messages::Message>] source messages to pipe
|
55
55
|
#
|
56
56
|
# @note If transactional producer in use and dispatch is not wrapped with a transaction,
|
57
57
|
# it will automatically wrap the dispatch with a transaction
|
@@ -66,7 +66,7 @@ module Karafka
|
|
66
66
|
# Sync multi-message pipe
|
67
67
|
#
|
68
68
|
# @param topic [String, Symbol] where we want to send the message
|
69
|
-
# @param messages [Array<Karafka::Messages::Message>]
|
69
|
+
# @param messages [Array<Karafka::Messages::Message>] source messages to pipe
|
70
70
|
#
|
71
71
|
# @note If transactional producer in use and dispatch is not wrapped with a transaction,
|
72
72
|
# it will automatically wrap the dispatch with a transaction
|
@@ -81,7 +81,7 @@ module Karafka
|
|
81
81
|
private
|
82
82
|
|
83
83
|
# @param topic [String, Symbol] where we want to send the message
|
84
|
-
# @param message [Karafka::Messages::Message]
|
84
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
85
85
|
# @return [Hash] hash with message to pipe.
|
86
86
|
#
|
87
87
|
# @note If you need to alter this, please define the `#enhance_pipe_message` method
|
@@ -90,17 +90,17 @@ module Karafka
|
|
90
90
|
topic: topic,
|
91
91
|
payload: message.raw_payload,
|
92
92
|
headers: message.raw_headers.merge(
|
93
|
-
'
|
94
|
-
'
|
95
|
-
'
|
96
|
-
'
|
93
|
+
'source_topic' => message.topic,
|
94
|
+
'source_partition' => message.partition.to_s,
|
95
|
+
'source_offset' => message.offset.to_s,
|
96
|
+
'source_consumer_group' => self.topic.consumer_group.id
|
97
97
|
)
|
98
98
|
}
|
99
99
|
|
100
100
|
# Use a key only if key was provided
|
101
101
|
if message.raw_key
|
102
102
|
pipe_message[:key] = message.raw_key
|
103
|
-
# Otherwise pipe creating a key that will assign it based on the
|
103
|
+
# Otherwise pipe creating a key that will assign it based on the source partition
|
104
104
|
# number
|
105
105
|
else
|
106
106
|
pipe_message[:key] = message.partition.to_s
|
@@ -40,7 +40,7 @@ module Karafka
|
|
40
40
|
mark_as_consumed(last_group_message) unless revoked?
|
41
41
|
# no need to check for manual seek because AJ consumer is internal and
|
42
42
|
# fully controlled by us
|
43
|
-
seek(seek_offset, false) unless revoked?
|
43
|
+
seek(seek_offset, false, reset_offset: false) unless revoked?
|
44
44
|
|
45
45
|
resume
|
46
46
|
else
|
@@ -55,14 +55,19 @@ module Karafka
|
|
55
55
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
56
56
|
# In case like this we ignore marking
|
57
57
|
return true if seek_offset.nil?
|
58
|
-
# Ignore
|
59
|
-
|
58
|
+
# Ignore if it is the same offset as the one that is marked currently
|
59
|
+
# We ignore second marking because it changes nothing and in case of people using
|
60
|
+
# metadata storage but with automatic offset marking, this would cause metadata to be
|
61
|
+
# erased by automatic marking
|
62
|
+
return true if (seek_offset - 1) == message.offset
|
60
63
|
return false if revoked?
|
61
64
|
|
62
65
|
# If we are not inside a transaction but this is a transactional topic, we mark with
|
63
66
|
# artificially created transaction
|
64
67
|
stored = if producer.transactional?
|
65
68
|
mark_with_transaction(message, offset_metadata, true)
|
69
|
+
elsif @_transactional_marking
|
70
|
+
raise Errors::NonTransactionalMarkingAttemptError
|
66
71
|
else
|
67
72
|
client.mark_as_consumed(message, offset_metadata)
|
68
73
|
end
|
@@ -92,14 +97,19 @@ module Karafka
|
|
92
97
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
93
98
|
# In case like this we ignore marking
|
94
99
|
return true if seek_offset.nil?
|
95
|
-
# Ignore
|
96
|
-
|
100
|
+
# Ignore if it is the same offset as the one that is marked currently
|
101
|
+
# We ignore second marking because it changes nothing and in case of people using
|
102
|
+
# metadata storage but with automatic offset marking, this would cause metadata to be
|
103
|
+
# erased by automatic marking
|
104
|
+
return true if (seek_offset - 1) == message.offset
|
97
105
|
return false if revoked?
|
98
106
|
|
99
107
|
# If we are not inside a transaction but this is a transactional topic, we mark with
|
100
108
|
# artificially created transaction
|
101
109
|
stored = if producer.transactional?
|
102
110
|
mark_with_transaction(message, offset_metadata, false)
|
111
|
+
elsif @_transactional_marking
|
112
|
+
raise Errors::NonTransactionalMarkingAttemptError
|
103
113
|
else
|
104
114
|
client.mark_as_consumed!(message, offset_metadata)
|
105
115
|
end
|
@@ -143,6 +153,7 @@ module Karafka
|
|
143
153
|
self.producer = active_producer
|
144
154
|
|
145
155
|
transaction_started = false
|
156
|
+
transaction_completed = false
|
146
157
|
|
147
158
|
# Prevent from nested transactions. It would not make any sense
|
148
159
|
raise Errors::TransactionAlreadyInitializedError if @_in_transaction
|
@@ -159,6 +170,12 @@ module Karafka
|
|
159
170
|
# transaction. We do it only for transactions that contain offset management as for
|
160
171
|
# producer only, this is not relevant.
|
161
172
|
raise Errors::AssignmentLostError if @_in_transaction_marked && revoked?
|
173
|
+
|
174
|
+
# If we do not reach this, we should not move seek offsets because it means that
|
175
|
+
# either an error occured or transaction was aborted.
|
176
|
+
# In case of error, it will bubble up so no issue but in case of abort, while we
|
177
|
+
# do not reach this place, the code will continue
|
178
|
+
transaction_completed = true
|
162
179
|
end
|
163
180
|
|
164
181
|
@_in_transaction = false
|
@@ -180,8 +197,13 @@ module Karafka
|
|
180
197
|
# to mimic this
|
181
198
|
# - Complex strategies like VPs can use this in VPs to mark in parallel without
|
182
199
|
# having to redefine the transactional flow completely
|
183
|
-
|
184
|
-
|
200
|
+
#
|
201
|
+
# @note This should be applied only if transaction did not error and if it was not
|
202
|
+
# aborted.
|
203
|
+
if transaction_completed
|
204
|
+
@_transaction_marked.each do |marking|
|
205
|
+
marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
|
206
|
+
end
|
185
207
|
end
|
186
208
|
|
187
209
|
true
|
@@ -213,6 +235,9 @@ module Karafka
|
|
213
235
|
offset_metadata
|
214
236
|
)
|
215
237
|
|
238
|
+
# This one is long lived and used to make sure, that users do not mix transactional
|
239
|
+
# marking with non-transactional. When this happens we should raise error
|
240
|
+
@_transactional_marking = true
|
216
241
|
@_in_transaction_marked = true
|
217
242
|
@_transaction_marked ||= []
|
218
243
|
@_transaction_marked << [message, offset_metadata, async]
|
@@ -252,8 +277,11 @@ module Karafka
|
|
252
277
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
253
278
|
# In case like this we ignore marking
|
254
279
|
return true if seek_offset.nil?
|
255
|
-
# Ignore
|
256
|
-
|
280
|
+
# Ignore if it is the same offset as the one that is marked currently
|
281
|
+
# We ignore second marking because it changes nothing and in case of people using
|
282
|
+
# metadata storage but with automatic offset marking, this would cause metadata to be
|
283
|
+
# erased by automatic marking
|
284
|
+
return true if (seek_offset - 1) == message.offset
|
257
285
|
return false if revoked?
|
258
286
|
|
259
287
|
# If we have already marked this successfully in a transaction that was running
|
@@ -145,19 +145,19 @@ module Karafka
|
|
145
145
|
# @param skippable_message [Array<Karafka::Messages::Message>]
|
146
146
|
# @return [Hash] dispatch DLQ message
|
147
147
|
def build_dlq_message(skippable_message)
|
148
|
-
|
148
|
+
source_partition = skippable_message.partition.to_s
|
149
149
|
|
150
150
|
dlq_message = {
|
151
|
-
topic: topic.dead_letter_queue.topic,
|
152
|
-
key:
|
151
|
+
topic: @_dispatch_to_dlq_topic || topic.dead_letter_queue.topic,
|
152
|
+
key: source_partition,
|
153
153
|
payload: skippable_message.raw_payload,
|
154
154
|
headers: skippable_message.raw_headers.merge(
|
155
|
-
'
|
156
|
-
'
|
157
|
-
'
|
158
|
-
'
|
159
|
-
'
|
160
|
-
'
|
155
|
+
'source_topic' => topic.name,
|
156
|
+
'source_partition' => source_partition,
|
157
|
+
'source_offset' => skippable_message.offset.to_s,
|
158
|
+
'source_consumer_group' => topic.consumer_group.id,
|
159
|
+
'source_key' => skippable_message.raw_key.to_s,
|
160
|
+
'source_attempts' => attempt.to_s
|
161
161
|
)
|
162
162
|
}
|
163
163
|
|
@@ -205,7 +205,7 @@ module Karafka
|
|
205
205
|
# In case of `:skip` and `:dispatch` will run the exact flow provided in a block
|
206
206
|
# In case of `:retry` always `#retry_after_pause` is applied
|
207
207
|
def apply_dlq_flow
|
208
|
-
flow = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
|
208
|
+
flow, target_topic = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
|
209
209
|
|
210
210
|
case flow
|
211
211
|
when :retry
|
@@ -216,6 +216,8 @@ module Karafka
|
|
216
216
|
@_dispatch_to_dlq = false
|
217
217
|
when :dispatch
|
218
218
|
@_dispatch_to_dlq = true
|
219
|
+
# Use custom topic if it was returned from the strategy
|
220
|
+
@_dispatch_to_dlq_topic = target_topic || topic.dead_letter_queue.topic
|
219
221
|
else
|
220
222
|
raise Karafka::UnsupportedCaseError, flow
|
221
223
|
end
|
@@ -227,6 +229,8 @@ module Karafka
|
|
227
229
|
|
228
230
|
# Always backoff after DLQ dispatch even on skip to prevent overloads on errors
|
229
231
|
pause(seek_offset, nil, false)
|
232
|
+
ensure
|
233
|
+
@_dispatch_to_dlq_topic = nil
|
230
234
|
end
|
231
235
|
|
232
236
|
# Marks message that went to DLQ (if applicable) based on the requested method
|
@@ -35,7 +35,7 @@ module Karafka
|
|
35
35
|
if coordinator.filtered? && !revoked?
|
36
36
|
handle_post_filtering
|
37
37
|
elsif !revoked? && !coordinator.manual_seek?
|
38
|
-
seek(last_group_message.offset + 1, false)
|
38
|
+
seek(last_group_message.offset + 1, false, reset_offset: false)
|
39
39
|
resume
|
40
40
|
else
|
41
41
|
resume
|
@@ -31,7 +31,9 @@ module Karafka
|
|
31
31
|
|
32
32
|
mark_as_consumed(last_group_message) unless revoked?
|
33
33
|
# We should not overwrite user manual seel request with our seek
|
34
|
-
|
34
|
+
unless revoked? || coordinator.manual_seek?
|
35
|
+
seek(seek_offset, false, reset_offset: false)
|
36
|
+
end
|
35
37
|
|
36
38
|
resume
|
37
39
|
else
|
@@ -43,7 +43,10 @@ module Karafka
|
|
43
43
|
return if coordinator.manual_pause?
|
44
44
|
|
45
45
|
mark_as_consumed(last_group_message) unless revoked?
|
46
|
-
|
46
|
+
|
47
|
+
unless revoked? || coordinator.manual_seek?
|
48
|
+
seek(seek_offset, false, reset_offset: false)
|
49
|
+
end
|
47
50
|
|
48
51
|
resume
|
49
52
|
else
|