karafka 2.4.18 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +59 -15
- data/.github/workflows/push.yml +35 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +75 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +72 -53
- data/LICENSE-COMM +2 -2
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/bin/clean_kafka +43 -0
- data/bin/integrations +20 -6
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +5 -1
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/avro/.gitkeep +0 -0
- data/examples/payloads/json/sample_set_01/enrollment_event.json +579 -0
- data/examples/payloads/json/sample_set_01/ingestion_event.json +30 -0
- data/examples/payloads/json/sample_set_01/transaction_event.json +17 -0
- data/examples/payloads/json/sample_set_01/user_event.json +11 -0
- data/karafka.gemspec +3 -8
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/active_job/job_extensions.rb +4 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin/configs.rb +5 -1
- data/lib/karafka/admin.rb +89 -42
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/cli/topics/align.rb +7 -4
- data/lib/karafka/cli/topics/base.rb +17 -0
- data/lib/karafka/cli/topics/create.rb +9 -7
- data/lib/karafka/cli/topics/delete.rb +4 -2
- data/lib/karafka/cli/topics/help.rb +39 -0
- data/lib/karafka/cli/topics/repartition.rb +4 -2
- data/lib/karafka/cli/topics.rb +10 -3
- data/lib/karafka/cli.rb +2 -0
- data/lib/karafka/connection/client.rb +39 -9
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +4 -1
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/base.rb +3 -2
- data/lib/karafka/contracts/config.rb +5 -1
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +46 -2
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/helpers/interval_runner.rb +8 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +95 -32
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +17 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +29 -6
- data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +9 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +12 -1
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +32 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +15 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +21 -2
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +3 -2
- data/lib/karafka/pro/routing/features/swarm.rb +4 -1
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +61 -26
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/dispatcher.rb +2 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages/proxy.rb +15 -3
- data/lib/karafka/pro/scheduled_messages/serializer.rb +2 -4
- data/lib/karafka/pro/scheduled_messages/state.rb +20 -23
- data/lib/karafka/pro/scheduled_messages/tracker.rb +34 -8
- data/lib/karafka/pro/scheduled_messages.rb +17 -1
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/builder.rb +12 -3
- data/lib/karafka/routing/features/base/expander.rb +8 -2
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +21 -18
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +40 -7
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +9 -2
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- metadata +41 -40
- checksums.yaml.gz.sig +0 -0
- data/certs/cert.pem +0 -26
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
@@ -22,7 +22,10 @@ module Karafka
|
|
22
22
|
|
23
23
|
# @param config [Karafka::Core::Configurable::Node] root node config
|
24
24
|
def post_setup(config)
|
25
|
-
Encryption::Contracts::Config.new.validate!(
|
25
|
+
Encryption::Contracts::Config.new.validate!(
|
26
|
+
config.to_h,
|
27
|
+
scope: %w[config]
|
28
|
+
)
|
26
29
|
|
27
30
|
# Don't inject extra components if encryption is not active
|
28
31
|
return unless config.encryption.active
|
@@ -33,6 +36,14 @@ module Karafka
|
|
33
36
|
# Encryption for WaterDrop
|
34
37
|
config.producer.middleware.append(Messages::Middleware.new)
|
35
38
|
end
|
39
|
+
|
40
|
+
# This feature does not need any changes post-fork
|
41
|
+
#
|
42
|
+
# @param _config [Karafka::Core::Configurable::Node]
|
43
|
+
# @param _pre_fork_producer [WaterDrop::Producer]
|
44
|
+
def post_fork(_config, _pre_fork_producer)
|
45
|
+
true
|
46
|
+
end
|
36
47
|
end
|
37
48
|
end
|
38
49
|
end
|
@@ -21,8 +21,10 @@ module Karafka
|
|
21
21
|
# - { 'topic1' => 100 } - means we run all partitions from the offset 100
|
22
22
|
# - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
|
23
23
|
# - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
|
24
|
-
# - { 'topic1' => { 1 => true } } - will pick first offset
|
25
|
-
# - { 'topic1' => true } - will pick first offset
|
24
|
+
# - { 'topic1' => { 1 => true } } - will pick first offset on this CG for partition 1
|
25
|
+
# - { 'topic1' => true } - will pick first offset for all partitions
|
26
|
+
# - { 'topic1' => :earliest } - will pick earliest offset for all partitions
|
27
|
+
# - { 'topic1' => :latest } - will pick latest (high-watermark) for all partitions
|
26
28
|
class Expander
|
27
29
|
# Expands topics to which we want to subscribe with partitions information in case this
|
28
30
|
# info is not provided.
|
@@ -80,7 +82,7 @@ module Karafka
|
|
80
82
|
.find { |topic| topic.fetch(:topic_name) == name }
|
81
83
|
.tap { |topic| topic || raise(Errors::TopicNotFoundError, name) }
|
82
84
|
.fetch(:partitions)
|
83
|
-
.
|
85
|
+
.size
|
84
86
|
end
|
85
87
|
end
|
86
88
|
end
|
@@ -14,6 +14,11 @@ module Karafka
|
|
14
14
|
# This builder resolves that and builds a tpl to which we can safely subscribe the way
|
15
15
|
# we want it.
|
16
16
|
class TplBuilder
|
17
|
+
# Supported named offset positions that we can reference via their name
|
18
|
+
SUPPORTED_NAMED_POSITIONS = %w[earliest latest].freeze
|
19
|
+
|
20
|
+
private_constant :SUPPORTED_NAMED_POSITIONS
|
21
|
+
|
17
22
|
# @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
|
18
23
|
# @param expanded_topics [Hash] hash with expanded and normalized topics data
|
19
24
|
def initialize(consumer, expanded_topics)
|
@@ -28,6 +33,7 @@ module Karafka
|
|
28
33
|
resolve_partitions_with_exact_offsets
|
29
34
|
resolve_partitions_with_negative_offsets
|
30
35
|
resolve_partitions_with_time_offsets
|
36
|
+
resolve_partitions_with_named_offsets
|
31
37
|
resolve_partitions_with_cg_expectations
|
32
38
|
|
33
39
|
# Final tpl with all the data
|
@@ -143,6 +149,23 @@ module Karafka
|
|
143
149
|
end
|
144
150
|
end
|
145
151
|
|
152
|
+
# If we get named offsets, we can just remap them to librdkafka special offset positions
|
153
|
+
def resolve_partitions_with_named_offsets
|
154
|
+
@expanded_topics.each do |name, partitions|
|
155
|
+
next unless partitions.is_a?(Hash)
|
156
|
+
|
157
|
+
partitions.each do |partition, offset|
|
158
|
+
# Skip offsets that do not match our named expectations
|
159
|
+
named_offset = offset.to_s
|
160
|
+
|
161
|
+
next unless SUPPORTED_NAMED_POSITIONS.include?(named_offset)
|
162
|
+
|
163
|
+
@mapped_topics[name][partition] = -1 if named_offset == 'latest'
|
164
|
+
@mapped_topics[name][partition] = -2 if named_offset == 'earliest'
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
146
169
|
# Fetches last used offsets for those partitions for which we want to consume from last
|
147
170
|
# moment where given consumer group has finished
|
148
171
|
# This is indicated by given partition value being set to `true`.
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -60,6 +60,15 @@ module Karafka
|
|
60
60
|
Processing::SubscriptionGroupsCoordinator.instance
|
61
61
|
end
|
62
62
|
|
63
|
+
# Runs operations needed after fork in swarm for features that need it
|
64
|
+
#
|
65
|
+
# @param config [Karafka::Core::Configurable::Node]
|
66
|
+
# @param pre_fork_producer [WaterDrop::Producer] pre fork producer instance that may be
|
67
|
+
# needed to be replaced with newly changed one post-fork.
|
68
|
+
def post_fork(config, pre_fork_producer)
|
69
|
+
features.each { |feature| feature.post_fork(config, pre_fork_producer) }
|
70
|
+
end
|
71
|
+
|
63
72
|
private
|
64
73
|
|
65
74
|
# @return [Array<Module>] extra non-routing related pro features and routing components
|
@@ -84,6 +93,7 @@ module Karafka
|
|
84
93
|
icfg.connection.manager = Connection::Manager.new
|
85
94
|
|
86
95
|
icfg.processing.coordinator_class = Processing::Coordinator
|
96
|
+
icfg.processing.errors_tracker_class = Processing::Coordinators::ErrorsTracker
|
87
97
|
icfg.processing.partitioner_class = Processing::Partitioner
|
88
98
|
icfg.processing.scheduler_class = Processing::Schedulers::Default
|
89
99
|
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
@@ -10,6 +10,9 @@ module Karafka
|
|
10
10
|
# within the same partition
|
11
11
|
class Coordinator < ::Karafka::Processing::Coordinator
|
12
12
|
extend Forwardable
|
13
|
+
include Helpers::ConfigImporter.new(
|
14
|
+
errors_tracker_class: %i[internal processing errors_tracker_class]
|
15
|
+
)
|
13
16
|
|
14
17
|
def_delegators :@collapser, :collapsed?, :collapse_until!
|
15
18
|
|
@@ -20,7 +23,7 @@ module Karafka
|
|
20
23
|
super
|
21
24
|
|
22
25
|
@executed = []
|
23
|
-
@errors_tracker =
|
26
|
+
@errors_tracker = errors_tracker_class.new(topic, partition)
|
24
27
|
@flow_mutex = Mutex.new
|
25
28
|
# Lock for user code synchronization
|
26
29
|
# We do not want to mix coordinator lock with the user lock not to create cases where
|
@@ -13,25 +13,52 @@ module Karafka
|
|
13
13
|
class ErrorsTracker
|
14
14
|
include Enumerable
|
15
15
|
|
16
|
+
# @return [Karafka::Routing::Topic] topic of this error tracker
|
17
|
+
attr_reader :topic
|
18
|
+
|
19
|
+
# @return [Integer] partition of this error tracker
|
20
|
+
attr_reader :partition
|
21
|
+
|
22
|
+
# @return [Hash]
|
23
|
+
attr_reader :counts
|
24
|
+
|
25
|
+
# @return [String]
|
26
|
+
attr_reader :trace_id
|
27
|
+
|
16
28
|
# Max errors we keep in memory.
|
17
29
|
# We do not want to keep more because for DLQ-less this would cause memory-leaks.
|
30
|
+
# We do however count per class for granular error counting
|
18
31
|
STORAGE_LIMIT = 100
|
19
32
|
|
20
33
|
private_constant :STORAGE_LIMIT
|
21
34
|
|
22
|
-
|
35
|
+
# @param topic [Karafka::Routing::Topic]
|
36
|
+
# @param partition [Integer]
|
37
|
+
# @param limit [Integer] max number of errors we want to keep for reference when
|
38
|
+
# implementing custom error handling.
|
39
|
+
# @note `limit` does not apply to the counts. They will work beyond the number of errors
|
40
|
+
# occurring
|
41
|
+
def initialize(topic, partition, limit: STORAGE_LIMIT)
|
23
42
|
@errors = []
|
43
|
+
@counts = Hash.new { |hash, key| hash[key] = 0 }
|
44
|
+
@topic = topic
|
45
|
+
@partition = partition
|
46
|
+
@limit = limit
|
47
|
+
@trace_id = SecureRandom.uuid
|
24
48
|
end
|
25
49
|
|
26
50
|
# Clears all the errors
|
27
51
|
def clear
|
28
52
|
@errors.clear
|
53
|
+
@counts.clear
|
29
54
|
end
|
30
55
|
|
31
56
|
# @param error [StandardError] adds the error to the tracker
|
32
57
|
def <<(error)
|
33
|
-
@errors.shift if @errors.size >=
|
58
|
+
@errors.shift if @errors.size >= @limit
|
34
59
|
@errors << error
|
60
|
+
@counts[error.class] += 1
|
61
|
+
@trace_id = SecureRandom.uuid
|
35
62
|
end
|
36
63
|
|
37
64
|
# @return [Boolean] is the error tracker empty
|
@@ -41,7 +68,9 @@ module Karafka
|
|
41
68
|
|
42
69
|
# @return [Integer] number of elements
|
43
70
|
def size
|
44
|
-
|
71
|
+
# We use counts reference of all errors and not the `@errors` array because it allows
|
72
|
+
# us to go beyond the whole errors storage limit
|
73
|
+
@counts.values.sum
|
45
74
|
end
|
46
75
|
|
47
76
|
# @return [StandardError, nil] last error that occurred or nil if no errors
|
@@ -98,6 +98,17 @@ module Karafka
|
|
98
98
|
:mark_as_consumed
|
99
99
|
end
|
100
100
|
|
101
|
+
# The first (lowest) message we want to mark as consumed in marking. By default it uses
|
102
|
+
# same position as cursor in case user wants to mark same message as consumed as the
|
103
|
+
# one on which cursor action is applied.
|
104
|
+
# @return [Karafka::Messages::Message, nil] cursor marking message or nil if none
|
105
|
+
# @note It should not return position in time format, only numerical offset
|
106
|
+
def marking_cursor
|
107
|
+
return nil unless active?
|
108
|
+
|
109
|
+
applied.map(&:marking_cursor).compact.min_by(&:offset)
|
110
|
+
end
|
111
|
+
|
101
112
|
private
|
102
113
|
|
103
114
|
# @return [Boolean] is filtering active
|
@@ -42,9 +42,11 @@ module Karafka
|
|
42
42
|
@applied
|
43
43
|
end
|
44
44
|
|
45
|
-
# @return [Integer] default timeout for pausing (if applicable)
|
45
|
+
# @return [Integer, nil] default timeout for pausing (if applicable) or nil if not
|
46
|
+
# @note Please do not return `0` when your filter is not pausing as it may interact
|
47
|
+
# with other filters that want to pause.
|
46
48
|
def timeout
|
47
|
-
|
49
|
+
nil
|
48
50
|
end
|
49
51
|
|
50
52
|
# @return [Boolean] should we use the cursor value to mark as consumed. If any of the
|
@@ -58,6 +60,12 @@ module Karafka
|
|
58
60
|
def marking_method
|
59
61
|
:mark_as_consumed
|
60
62
|
end
|
63
|
+
|
64
|
+
# @return [Karafka::Messages::Message, nil] cursor message for marking or nil if no
|
65
|
+
# marking
|
66
|
+
def marking_cursor
|
67
|
+
cursor
|
68
|
+
end
|
61
69
|
end
|
62
70
|
end
|
63
71
|
end
|
@@ -54,9 +54,9 @@ module Karafka
|
|
54
54
|
@applied = true
|
55
55
|
end
|
56
56
|
|
57
|
-
# @return [Integer] ms timeout in case of pause
|
57
|
+
# @return [Integer, nil] ms timeout in case of pause or nil if not delaying
|
58
58
|
def timeout
|
59
|
-
@cursor && applied? ? PAUSE_TIMEOUT :
|
59
|
+
@cursor && applied? ? PAUSE_TIMEOUT : nil
|
60
60
|
end
|
61
61
|
|
62
62
|
# Pause when we had to back-off or skip if delay is not needed
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
module ParallelSegments
|
10
|
+
# Module for filters injected into the processing pipeline of each of the topics used
|
11
|
+
# within the parallel segmented consumer groups
|
12
|
+
module Filters
|
13
|
+
# Base class for filters for parallel segments that deal with different feature scenarios
|
14
|
+
class Base < Processing::Filters::Base
|
15
|
+
# @param segment_id [Integer] numeric id of the parallel segment group to use with the
|
16
|
+
# partitioner and reducer for segment matching comparison
|
17
|
+
# @param partitioner [Proc]
|
18
|
+
# @param reducer [Proc]
|
19
|
+
def initialize(segment_id:, partitioner:, reducer:)
|
20
|
+
super()
|
21
|
+
|
22
|
+
@segment_id = segment_id
|
23
|
+
@partitioner = partitioner
|
24
|
+
@reducer = reducer
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
# @param message [Karafka::Messages::Message] received message
|
30
|
+
# @return [String, Numeric] segment assignment key
|
31
|
+
def partition(message)
|
32
|
+
@partitioner.call(message)
|
33
|
+
rescue StandardError => e
|
34
|
+
# This should not happen. If you are seeing this it means your partitioner code
|
35
|
+
# failed and raised an error. We highly recommend mitigating partitioner level errors
|
36
|
+
# on the user side because this type of collapse should be considered a last resort
|
37
|
+
Karafka.monitor.instrument(
|
38
|
+
'error.occurred',
|
39
|
+
caller: self,
|
40
|
+
error: e,
|
41
|
+
message: message,
|
42
|
+
type: 'parallel_segments.partitioner.error'
|
43
|
+
)
|
44
|
+
|
45
|
+
:failure
|
46
|
+
end
|
47
|
+
|
48
|
+
# @param message_segment_key [String, Numeric] segment key to pass to the reducer
|
49
|
+
# @return [Integer] segment assignment of a given message
|
50
|
+
def reduce(message_segment_key)
|
51
|
+
# Assign to segment 0 always in case of failures in partitioner
|
52
|
+
# This is a fail-safe
|
53
|
+
return 0 if message_segment_key == :failure
|
54
|
+
|
55
|
+
@reducer.call(message_segment_key)
|
56
|
+
rescue StandardError => e
|
57
|
+
# @see `#partition` method error handling doc
|
58
|
+
Karafka.monitor.instrument(
|
59
|
+
'error.occurred',
|
60
|
+
caller: self,
|
61
|
+
error: e,
|
62
|
+
message_segment_key: message_segment_key,
|
63
|
+
type: 'parallel_segments.reducer.error'
|
64
|
+
)
|
65
|
+
|
66
|
+
0
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
# Processing components namespace for parallel segments feature
|
10
|
+
module ParallelSegments
|
11
|
+
module Filters
|
12
|
+
# Filter used for handling parallel segments with automatic offset management. Handles
|
13
|
+
# message distribution and ensures proper offset management when messages are filtered
|
14
|
+
# out during the distribution process.
|
15
|
+
#
|
16
|
+
# When operating in automatic offset management mode, this filter takes care of marking
|
17
|
+
# offsets of messages that were filtered out during the distribution process to maintain
|
18
|
+
# proper offset progression.
|
19
|
+
#
|
20
|
+
# @note This is the default filter that should be used when manual offset management
|
21
|
+
# is not enabled. For manual offset management scenarios use the Mom filter instead.
|
22
|
+
class Default < Base
|
23
|
+
# Applies the filter to the batch of messages
|
24
|
+
# It removes messages that don't belong to the current parallel segment group
|
25
|
+
# based on the partitioner and reducer logic
|
26
|
+
#
|
27
|
+
# @param messages [Array<Karafka::Messages::Message>] messages batch that we want to
|
28
|
+
# filter
|
29
|
+
def apply!(messages)
|
30
|
+
@applied = false
|
31
|
+
@all_filtered = false
|
32
|
+
@cursor = messages.first
|
33
|
+
|
34
|
+
# Keep track of how many messages we had initially
|
35
|
+
initial_size = messages.size
|
36
|
+
|
37
|
+
# Filter out messages that don't match our segment group
|
38
|
+
messages.delete_if do |message|
|
39
|
+
message_segment_key = partition(message)
|
40
|
+
|
41
|
+
# Use the reducer to get the target group for this message
|
42
|
+
target_segment = reduce(message_segment_key)
|
43
|
+
|
44
|
+
# Remove the message if it doesn't belong to our group
|
45
|
+
remove = target_segment != @segment_id
|
46
|
+
|
47
|
+
if remove
|
48
|
+
@cursor = message
|
49
|
+
@applied = true
|
50
|
+
end
|
51
|
+
|
52
|
+
remove
|
53
|
+
end
|
54
|
+
|
55
|
+
# If all messages were filtered out, we want to mark them as consumed
|
56
|
+
@all_filtered = messages.empty? && initial_size.positive?
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [Boolean] true if any messages were filtered out
|
60
|
+
def applied?
|
61
|
+
@applied
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [Boolean] true if we should mark as consumed (when all were filtered)
|
65
|
+
def mark_as_consumed?
|
66
|
+
@all_filtered
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [nil] Since we do not timeout ever in this filter, we should not return
|
70
|
+
# any value for it.
|
71
|
+
def timeout
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
|
75
|
+
# Only return cursor if we wanted to mark as consumed in case all was filtered.
|
76
|
+
# Otherwise it could interfere with other filters
|
77
|
+
def cursor
|
78
|
+
@all_filtered ? @cursor : nil
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
module ParallelSegments
|
10
|
+
module Filters
|
11
|
+
# Filter used for handling parallel segments when manual offset management (mom) is
|
12
|
+
# enabled. Provides message distribution without any post-filtering offset state
|
13
|
+
# management as it is fully user-based.
|
14
|
+
#
|
15
|
+
# Since with manual offset management we need to ensure that offsets are never marked
|
16
|
+
# even in cases where all data in a batch is filtered out.
|
17
|
+
#
|
18
|
+
# This separation allows for cleaner implementation and easier debugging of each flow.
|
19
|
+
#
|
20
|
+
# @note This filter should be used only when manual offset management is enabled.
|
21
|
+
# For automatic offset management scenarios use the regular filter instead.
|
22
|
+
class Mom < Base
|
23
|
+
# Applies the filter to the batch of messages
|
24
|
+
# It removes messages that don't belong to the current parallel segment group
|
25
|
+
# based on the partitioner and reducer logic without any offset marking
|
26
|
+
#
|
27
|
+
# @param messages [Array<Karafka::Messages::Message>] messages batch that we want to
|
28
|
+
# filter
|
29
|
+
def apply!(messages)
|
30
|
+
@applied = false
|
31
|
+
|
32
|
+
# Filter out messages that don't match our segment group
|
33
|
+
messages.delete_if do |message|
|
34
|
+
message_segment_key = partition(message)
|
35
|
+
# Use the reducer to get the target group for this message
|
36
|
+
target_segment = reduce(message_segment_key)
|
37
|
+
# Remove the message if it doesn't belong to our segment
|
38
|
+
remove = target_segment != @segment_id
|
39
|
+
|
40
|
+
@applied = true if remove
|
41
|
+
|
42
|
+
remove
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Boolean] true if any messages were filtered out
|
47
|
+
def applied?
|
48
|
+
@applied
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [Boolean] false, as mom mode never marks as consumed automatically
|
52
|
+
def mark_as_consumed?
|
53
|
+
false
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return [nil] Since we do not timeout ever in this filter, we should not return
|
57
|
+
# any value for it.
|
58
|
+
def timeout
|
59
|
+
nil
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -38,19 +38,7 @@ module Karafka
|
|
38
38
|
# reduce the whole set into one partition and emit error. This should still allow for
|
39
39
|
# user flow but should mitigate damages by not virtualizing
|
40
40
|
begin
|
41
|
-
groupings = messages
|
42
|
-
# We need to reduce it to the max concurrency, so the group_id is not a direct
|
43
|
-
# effect of the end user action. Otherwise the persistence layer for consumers
|
44
|
-
# would cache it forever and it would cause memory leaks
|
45
|
-
#
|
46
|
-
# This also needs to be consistent because the aggregation here needs to warrant,
|
47
|
-
# that the same partitioned message will always be assigned to the same virtual
|
48
|
-
# partition. Otherwise in case of a window aggregation with VP spanning across
|
49
|
-
# several polls, the data could not be complete.
|
50
|
-
vps.reducer.call(
|
51
|
-
vps.partitioner.call(msg)
|
52
|
-
)
|
53
|
-
end
|
41
|
+
groupings = vps.distributor.call(messages)
|
54
42
|
rescue StandardError => e
|
55
43
|
# This should not happen. If you are seeing this it means your partitioner code
|
56
44
|
# failed and raised an error. We highly recommend mitigating partitioner level errors
|
@@ -20,16 +20,16 @@ module Karafka
|
|
20
20
|
|
21
21
|
# Pipes given message to the provided topic with expected details. Useful for
|
22
22
|
# pass-through operations where deserialization is not needed. Upon usage it will include
|
23
|
-
# all the
|
23
|
+
# all the source headers + meta headers about the source of message.
|
24
24
|
#
|
25
25
|
# @param topic [String, Symbol] where we want to send the message
|
26
|
-
# @param message [Karafka::Messages::Message]
|
26
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
27
27
|
#
|
28
28
|
# @note It will NOT deserialize the payload so it is fast
|
29
29
|
#
|
30
30
|
# @note We assume that there can be different number of partitions in the target topic,
|
31
|
-
# this is why we use `key` based on the
|
32
|
-
# This will not utilize partitions beyond the number of partitions of
|
31
|
+
# this is why we use `key` based on the source topic key and not the partition id.
|
32
|
+
# This will not utilize partitions beyond the number of partitions of source topic,
|
33
33
|
# but will accommodate for topics with less partitions.
|
34
34
|
def pipe_async(topic:, message:)
|
35
35
|
produce_async(
|
@@ -40,7 +40,7 @@ module Karafka
|
|
40
40
|
# Sync version of pipe for one message
|
41
41
|
#
|
42
42
|
# @param topic [String, Symbol] where we want to send the message
|
43
|
-
# @param message [Karafka::Messages::Message]
|
43
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
44
44
|
# @see [#pipe_async]
|
45
45
|
def pipe_sync(topic:, message:)
|
46
46
|
produce_sync(
|
@@ -51,7 +51,7 @@ module Karafka
|
|
51
51
|
# Async multi-message pipe
|
52
52
|
#
|
53
53
|
# @param topic [String, Symbol] where we want to send the message
|
54
|
-
# @param messages [Array<Karafka::Messages::Message>]
|
54
|
+
# @param messages [Array<Karafka::Messages::Message>] source messages to pipe
|
55
55
|
#
|
56
56
|
# @note If transactional producer in use and dispatch is not wrapped with a transaction,
|
57
57
|
# it will automatically wrap the dispatch with a transaction
|
@@ -66,7 +66,7 @@ module Karafka
|
|
66
66
|
# Sync multi-message pipe
|
67
67
|
#
|
68
68
|
# @param topic [String, Symbol] where we want to send the message
|
69
|
-
# @param messages [Array<Karafka::Messages::Message>]
|
69
|
+
# @param messages [Array<Karafka::Messages::Message>] source messages to pipe
|
70
70
|
#
|
71
71
|
# @note If transactional producer in use and dispatch is not wrapped with a transaction,
|
72
72
|
# it will automatically wrap the dispatch with a transaction
|
@@ -81,7 +81,7 @@ module Karafka
|
|
81
81
|
private
|
82
82
|
|
83
83
|
# @param topic [String, Symbol] where we want to send the message
|
84
|
-
# @param message [Karafka::Messages::Message]
|
84
|
+
# @param message [Karafka::Messages::Message] source message to pipe
|
85
85
|
# @return [Hash] hash with message to pipe.
|
86
86
|
#
|
87
87
|
# @note If you need to alter this, please define the `#enhance_pipe_message` method
|
@@ -90,17 +90,17 @@ module Karafka
|
|
90
90
|
topic: topic,
|
91
91
|
payload: message.raw_payload,
|
92
92
|
headers: message.raw_headers.merge(
|
93
|
-
'
|
94
|
-
'
|
95
|
-
'
|
96
|
-
'
|
93
|
+
'source_topic' => message.topic,
|
94
|
+
'source_partition' => message.partition.to_s,
|
95
|
+
'source_offset' => message.offset.to_s,
|
96
|
+
'source_consumer_group' => self.topic.consumer_group.id
|
97
97
|
)
|
98
98
|
}
|
99
99
|
|
100
100
|
# Use a key only if key was provided
|
101
101
|
if message.raw_key
|
102
102
|
pipe_message[:key] = message.raw_key
|
103
|
-
# Otherwise pipe creating a key that will assign it based on the
|
103
|
+
# Otherwise pipe creating a key that will assign it based on the source partition
|
104
104
|
# number
|
105
105
|
else
|
106
106
|
pipe_message[:key] = message.partition.to_s
|
@@ -40,7 +40,7 @@ module Karafka
|
|
40
40
|
mark_as_consumed(last_group_message) unless revoked?
|
41
41
|
# no need to check for manual seek because AJ consumer is internal and
|
42
42
|
# fully controlled by us
|
43
|
-
seek(seek_offset, false) unless revoked?
|
43
|
+
seek(seek_offset, false, reset_offset: false) unless revoked?
|
44
44
|
|
45
45
|
resume
|
46
46
|
else
|