karafka 2.4.18 → 2.5.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +58 -14
- data/.github/workflows/push.yml +36 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +60 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +69 -50
- data/LICENSE-COMM +2 -2
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/bin/clean_kafka +43 -0
- data/bin/integrations +19 -6
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +3 -0
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/json/enrollment_event.json +579 -0
- data/examples/payloads/json/ingestion_event.json +30 -0
- data/examples/payloads/json/transaction_event.json +17 -0
- data/examples/payloads/json/user_event.json +11 -0
- data/karafka.gemspec +3 -8
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin/configs.rb +5 -1
- data/lib/karafka/admin.rb +69 -34
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/cli/topics/align.rb +7 -4
- data/lib/karafka/cli/topics/base.rb +17 -0
- data/lib/karafka/cli/topics/create.rb +9 -7
- data/lib/karafka/cli/topics/delete.rb +4 -2
- data/lib/karafka/cli/topics/help.rb +39 -0
- data/lib/karafka/cli/topics/repartition.rb +4 -2
- data/lib/karafka/cli/topics.rb +10 -3
- data/lib/karafka/cli.rb +2 -0
- data/lib/karafka/connection/client.rb +30 -9
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +3 -0
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/config.rb +3 -0
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +46 -2
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +86 -23
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +8 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +13 -0
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +19 -21
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages.rb +13 -0
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +19 -19
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +22 -1
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +4 -0
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- metadata +40 -40
- checksums.yaml.gz.sig +0 -0
- data/certs/cert.pem +0 -26
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,164 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Cli
|
9
|
+
class ParallelSegments < Karafka::Cli::Base
|
10
|
+
# Takes the committed offset of each parallel segment for each topic and records
|
11
|
+
# them back onto the segment origin consumer group. Without `--force` it will raise an
|
12
|
+
# error on conflicts. With `--force` it will take the lowest possible offset for each
|
13
|
+
# topic partition as the baseline.
|
14
|
+
#
|
15
|
+
# @note Running this can cause you some double processing if the parallel segments final
|
16
|
+
# offsets are not aligned.
|
17
|
+
#
|
18
|
+
# @note This will **not** remove the parallel segments consumer groups. Please use the
|
19
|
+
# Admin API if you want them to be removed.
|
20
|
+
class Collapse < Base
|
21
|
+
# Runs the collapse operation
|
22
|
+
def call
|
23
|
+
puts 'Starting parallel segments collapse...'
|
24
|
+
|
25
|
+
segments_count = applicable_groups.size
|
26
|
+
|
27
|
+
if segments_count.zero?
|
28
|
+
puts "#{red('No')} consumer groups with parallel segments configuration found"
|
29
|
+
|
30
|
+
return
|
31
|
+
end
|
32
|
+
|
33
|
+
puts(
|
34
|
+
"Found #{green(segments_count)} consumer groups with parallel segments configuration"
|
35
|
+
)
|
36
|
+
|
37
|
+
collapses = []
|
38
|
+
|
39
|
+
applicable_groups.each do |segment_origin, segments|
|
40
|
+
puts
|
41
|
+
puts "Collecting group #{yellow(segment_origin)} details..."
|
42
|
+
offsets = collect_offsets(segment_origin, segments)
|
43
|
+
|
44
|
+
unless options.key?(:force)
|
45
|
+
puts
|
46
|
+
puts "Validating offsets positions for #{yellow(segment_origin)} consumer group..."
|
47
|
+
validate!(offsets, segment_origin)
|
48
|
+
end
|
49
|
+
|
50
|
+
puts
|
51
|
+
puts "Computing collapsed offsets for #{yellow(segment_origin)} consumer group..."
|
52
|
+
collapses << collapse(offsets, segments)
|
53
|
+
end
|
54
|
+
|
55
|
+
collapses.each do |collapse|
|
56
|
+
apply(collapse)
|
57
|
+
end
|
58
|
+
|
59
|
+
puts
|
60
|
+
puts "Collapse completed #{green('successfully')}!"
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
# Computes the lowest possible offset available for each topic partition and sets it
|
66
|
+
# on the segment origin consumer group.
|
67
|
+
#
|
68
|
+
# @param offsets [Hash]
|
69
|
+
# @param segments [Array<Karafka::Routing::ConsumerGroup>]
|
70
|
+
# @note This code does **not** apply the offsets, just computes their positions
|
71
|
+
def collapse(offsets, segments)
|
72
|
+
collapse = Hash.new { |h, k| h[k] = {} }
|
73
|
+
segments_names = segments.map(&:name)
|
74
|
+
|
75
|
+
offsets.each do |cg_name, topics|
|
76
|
+
next unless segments_names.include?(cg_name)
|
77
|
+
|
78
|
+
topics.each do |topic_name, partitions|
|
79
|
+
partitions.each do |partition_id, offset|
|
80
|
+
current_lowest_offset = collapse[topic_name][partition_id]
|
81
|
+
|
82
|
+
next if current_lowest_offset && current_lowest_offset < offset
|
83
|
+
|
84
|
+
collapse[topic_name][partition_id] = offset
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
{
|
90
|
+
collapse: collapse,
|
91
|
+
segment_origin: segments.first.segment_origin
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
# In order to collapse the offsets of parallel segments back to one, we need to know
|
96
|
+
# to what offsets to collapse. The issue (that we solve picking lowest when forced)
|
97
|
+
# arises when there are more offsets that are not even in parallel segments for one
|
98
|
+
# topic partition. We should let user know about this if this happens so he does not
|
99
|
+
# end up with double-processing.
|
100
|
+
#
|
101
|
+
# @param offsets [Hash]
|
102
|
+
# @param segment_origin [String]
|
103
|
+
def validate!(offsets, segment_origin)
|
104
|
+
collapse = Hash.new { |h, k| h[k] = {} }
|
105
|
+
|
106
|
+
offsets.each do |cg_name, topics|
|
107
|
+
next if cg_name == segment_origin
|
108
|
+
|
109
|
+
topics.each do |topic_name, partitions|
|
110
|
+
partitions.each do |partition_id, offset|
|
111
|
+
collapse[topic_name][partition_id] ||= Set.new
|
112
|
+
collapse[topic_name][partition_id] << offset
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
inconclusive = false
|
118
|
+
|
119
|
+
collapse.each do |topic_name, partitions|
|
120
|
+
partitions.each do |partition_id, parallel_offsets|
|
121
|
+
next if parallel_offsets.size <= 1
|
122
|
+
|
123
|
+
inconclusive = true
|
124
|
+
|
125
|
+
puts(
|
126
|
+
" Inconclusive offsets for #{red(topic_name)}##{red(partition_id)}:" \
|
127
|
+
" #{parallel_offsets.to_a.join(', ')}"
|
128
|
+
)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
return unless inconclusive
|
133
|
+
|
134
|
+
raise(
|
135
|
+
::Karafka::Errors::CommandValidationError,
|
136
|
+
"Parallel segments for #{red(segment_origin)} have #{red('inconclusive')} offsets"
|
137
|
+
)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Applies the collapsed lowest offsets onto the segment origin consumer group
|
141
|
+
#
|
142
|
+
# @param collapse [Hash]
|
143
|
+
def apply(collapse)
|
144
|
+
segment_origin = collapse[:segment_origin]
|
145
|
+
alignments = collapse[:collapse]
|
146
|
+
|
147
|
+
puts
|
148
|
+
puts "Adjusting offsets of segment origin consumer group: #{green(segment_origin)}"
|
149
|
+
|
150
|
+
alignments.each do |topic_name, partitions|
|
151
|
+
puts " Topic #{green(topic_name)}:"
|
152
|
+
|
153
|
+
partitions.each do |partition_id, offset|
|
154
|
+
puts " Partition #{green(partition_id)}: starting offset #{green(offset)}"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
Karafka::Admin.seek_consumer_group(segment_origin, alignments)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Cli
|
9
|
+
class ParallelSegments < Karafka::Cli::Base
|
10
|
+
# Command that makes it easier for users to migrate from regular consumer groups to
|
11
|
+
# the parallel segments consumers groups by automatically distributing offsets based on
|
12
|
+
# the used "normal" consumer group.
|
13
|
+
#
|
14
|
+
# Takes the segments origin consumer group offsets for a given set of topics and
|
15
|
+
# distributes those offsets onto the parallel segments consumer groups, so they can pick
|
16
|
+
# up where the origin group left.
|
17
|
+
#
|
18
|
+
# To make sure users do not accidentally "re-distribute" their offsets from the original
|
19
|
+
# consumer group after the parallel consumer groups had offsets assigned and started to
|
20
|
+
# work, we check if the parallel groups have any offsets, if so unless forced we halt.
|
21
|
+
#
|
22
|
+
# @note This command does not remove the original consumer group from Kafka. We keep it
|
23
|
+
# just as a backup. User can remove it himself.
|
24
|
+
#
|
25
|
+
# @note Kafka has no atomic operations this is why we first collect all the data and run
|
26
|
+
# needed validations before applying offsets.
|
27
|
+
class Distribute < Base
|
28
|
+
# Runs the distribution process
|
29
|
+
def call
|
30
|
+
puts 'Starting parallel segments distribution...'
|
31
|
+
|
32
|
+
segments_count = applicable_groups.size
|
33
|
+
|
34
|
+
if segments_count.zero?
|
35
|
+
puts "#{red('No')} consumer groups with parallel segments configuration found"
|
36
|
+
|
37
|
+
return
|
38
|
+
end
|
39
|
+
|
40
|
+
puts(
|
41
|
+
"Found #{green(segments_count)} consumer groups with parallel segments configuration"
|
42
|
+
)
|
43
|
+
|
44
|
+
distributions = []
|
45
|
+
|
46
|
+
applicable_groups.each do |segment_origin, segments|
|
47
|
+
puts
|
48
|
+
puts "Collecting group #{yellow(segment_origin)} details..."
|
49
|
+
offsets = collect_offsets(segment_origin, segments)
|
50
|
+
|
51
|
+
unless options.key?(:force)
|
52
|
+
puts "Validating group #{yellow(segment_origin)} parallel segments..."
|
53
|
+
validate!(offsets, segments)
|
54
|
+
end
|
55
|
+
|
56
|
+
puts "Distributing group #{yellow(segment_origin)} offsets..."
|
57
|
+
distributions += distribute(offsets, segments)
|
58
|
+
end
|
59
|
+
|
60
|
+
distributions.each do |distribution|
|
61
|
+
apply(distribution)
|
62
|
+
end
|
63
|
+
|
64
|
+
puts
|
65
|
+
puts "Distribution completed #{green('successfully')}!"
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
# Validates the current state of topics offsets assignments.
|
71
|
+
# We want to make sure, that users do not run distribution twice, especially for a
|
72
|
+
# parallel segments consumers group set that was already actively consumed. This is why
|
73
|
+
# we check if there was any offsets already present in the parallel segments consumer
|
74
|
+
# groups and if so, we raise an error. This can be disabled with `--force`.
|
75
|
+
#
|
76
|
+
# It prevents users from overwriting the already set segments distribution.
|
77
|
+
# Adding new topics to the same parallel segments consumer group does not require us to
|
78
|
+
# run this at all and on top of that users can always use `--consumer_groups` flag to
|
79
|
+
# limit the cgs that we will be operating here
|
80
|
+
#
|
81
|
+
# @param offsets [Hash]
|
82
|
+
# @param segments [Array<Karafka::Routing::ConsumerGroup>]
|
83
|
+
def validate!(offsets, segments)
|
84
|
+
segments_names = segments.map(&:name)
|
85
|
+
|
86
|
+
offsets.each do |cg_name, topics|
|
87
|
+
next unless segments_names.include?(cg_name)
|
88
|
+
|
89
|
+
topics.each do |topic_name, partitions|
|
90
|
+
partitions.each do |partition_id, offset|
|
91
|
+
next unless offset.to_i.positive?
|
92
|
+
|
93
|
+
raise(
|
94
|
+
::Karafka::Errors::CommandValidationError,
|
95
|
+
"Parallel segment #{red(cg_name)} already has offset #{red(offset)}" \
|
96
|
+
" set for #{red("#{topic_name}##{partition_id}")}"
|
97
|
+
)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Computes the offsets distribution for all the segments consumer groups so when user
|
104
|
+
# migrates from one CG to parallel segments, those segments know where to start consuming
|
105
|
+
# the data.
|
106
|
+
#
|
107
|
+
# @param offsets [Hash]
|
108
|
+
# @param segments [Array<Karafka::Routing::ConsumerGroup>]
|
109
|
+
# @note This code does **not** apply the offsets, just computes their positions
|
110
|
+
def distribute(offsets, segments)
|
111
|
+
distributions = []
|
112
|
+
segments_names = segments.map(&:name)
|
113
|
+
|
114
|
+
offsets.each do |cg_name, topics|
|
115
|
+
next if segments_names.include?(cg_name)
|
116
|
+
|
117
|
+
distribution = {}
|
118
|
+
|
119
|
+
topics.each do |topic_name, partitions|
|
120
|
+
partitions.each do |partition_id, offset|
|
121
|
+
distribution[topic_name] ||= {}
|
122
|
+
distribution[topic_name][partition_id] = offset
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
next if distribution.empty?
|
127
|
+
|
128
|
+
segments_names.each do |segment_name|
|
129
|
+
distributions << {
|
130
|
+
segment_name: segment_name,
|
131
|
+
distribution: distribution
|
132
|
+
}
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
distributions
|
137
|
+
end
|
138
|
+
|
139
|
+
# Takes the details of the distribution of offsets for a given segment and adjust the
|
140
|
+
# starting offsets for all the consumer group topics based on the distribution.
|
141
|
+
#
|
142
|
+
# @param distribution [Hash]
|
143
|
+
def apply(distribution)
|
144
|
+
segment_name = distribution[:segment_name]
|
145
|
+
alignments = distribution[:distribution]
|
146
|
+
|
147
|
+
puts
|
148
|
+
puts "Adjusting offsets of parallel segments consumer group: #{green(segment_name)}"
|
149
|
+
|
150
|
+
alignments.each do |topic_name, partitions|
|
151
|
+
puts " Topic #{green(topic_name)}:"
|
152
|
+
|
153
|
+
partitions.each do |partition_id, offset|
|
154
|
+
puts " Partition #{green(partition_id)}: starting offset #{green(offset)}"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
Karafka::Admin.seek_consumer_group(segment_name, alignments)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
# Pro related CLI commands
|
9
|
+
module Cli
|
10
|
+
# CLI entry-point for parallel segments management commands
|
11
|
+
class ParallelSegments < Karafka::Cli::Base
|
12
|
+
include Helpers::Colorize
|
13
|
+
include Helpers::ConfigImporter.new(
|
14
|
+
kafka_config: %i[kafka]
|
15
|
+
)
|
16
|
+
|
17
|
+
desc 'Allows for parallel segments management'
|
18
|
+
|
19
|
+
option(
|
20
|
+
:groups,
|
21
|
+
'Names of consumer groups on which we want to run the command. All if not provided',
|
22
|
+
Array,
|
23
|
+
%w[
|
24
|
+
--groups
|
25
|
+
--consumer_groups
|
26
|
+
]
|
27
|
+
)
|
28
|
+
|
29
|
+
# Some operations may not be allowed to run again after data is set in certain ways.
|
30
|
+
# For example if a distribution command is invoked when the parallel group segment
|
31
|
+
# consumer groups already have offsets set, we will fail unless user wants to force it.
|
32
|
+
# This prevents users from accidentally running the command in such ways that would cause
|
33
|
+
# their existing distributed offsets to be reset.
|
34
|
+
option(
|
35
|
+
:force,
|
36
|
+
'Should an operation on the parallel segments consumer group be forced',
|
37
|
+
TrueClass,
|
38
|
+
%w[
|
39
|
+
--force
|
40
|
+
]
|
41
|
+
)
|
42
|
+
|
43
|
+
# @param action [String] action we want to take
|
44
|
+
def call(action = 'distribute')
|
45
|
+
case action
|
46
|
+
when 'distribute'
|
47
|
+
Distribute.new(options).call
|
48
|
+
when 'collapse'
|
49
|
+
Collapse.new(options).call
|
50
|
+
when 'reset'
|
51
|
+
Collapse.new(options).call
|
52
|
+
Distribute.new(options).call
|
53
|
+
else
|
54
|
+
raise ::ArgumentError, "Invalid topics action: #{action}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -19,14 +19,9 @@ module Karafka
|
|
19
19
|
class Manager < Karafka::Connection::Manager
|
20
20
|
include Core::Helpers::Time
|
21
21
|
|
22
|
-
#
|
23
|
-
|
24
|
-
# @param scale_delay [Integer] How long should we wait before making any changes. Any
|
25
|
-
# change related to this consumer group will postpone the scaling operations. This is
|
26
|
-
# done that way to prevent too many friction in the cluster. It is 1 minute by default
|
27
|
-
def initialize(scale_delay = 60 * 1_000)
|
22
|
+
# Creates new manager instance
|
23
|
+
def initialize
|
28
24
|
super()
|
29
|
-
@scale_delay = scale_delay
|
30
25
|
@mutex = Mutex.new
|
31
26
|
@changes = Hash.new do |h, k|
|
32
27
|
h[k] = {
|
@@ -201,7 +196,7 @@ module Karafka
|
|
201
196
|
|
202
197
|
next unless multiplexing.active?
|
203
198
|
next unless multiplexing.dynamic?
|
204
|
-
# If we cannot
|
199
|
+
# If we cannot upscale, do not
|
205
200
|
next if sg_listeners.count(&:active?) >= multiplexing.max
|
206
201
|
|
207
202
|
sg_listeners.each do |sg_listener|
|
@@ -234,6 +229,8 @@ module Karafka
|
|
234
229
|
# are also stable. This is a strong indicator that no rebalances or other operations are
|
235
230
|
# happening at a given moment.
|
236
231
|
def stable?(sg_listeners)
|
232
|
+
@scale_delay ||= sg_listeners.first.subscription_group.multiplexing.scale_delay
|
233
|
+
|
237
234
|
sg_listeners.all? do |sg_listener|
|
238
235
|
# If a listener is not active, we do not take it into consideration when looking at
|
239
236
|
# the stability data
|
@@ -33,6 +33,14 @@ module Karafka
|
|
33
33
|
# Encryption for WaterDrop
|
34
34
|
config.producer.middleware.append(Messages::Middleware.new)
|
35
35
|
end
|
36
|
+
|
37
|
+
# This feature does not need any changes post-fork
|
38
|
+
#
|
39
|
+
# @param _config [Karafka::Core::Configurable::Node]
|
40
|
+
# @param _pre_fork_producer [WaterDrop::Producer]
|
41
|
+
def post_fork(_config, _pre_fork_producer)
|
42
|
+
true
|
43
|
+
end
|
36
44
|
end
|
37
45
|
end
|
38
46
|
end
|
@@ -21,8 +21,10 @@ module Karafka
|
|
21
21
|
# - { 'topic1' => 100 } - means we run all partitions from the offset 100
|
22
22
|
# - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
|
23
23
|
# - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
|
24
|
-
# - { 'topic1' => { 1 => true } } - will pick first offset
|
25
|
-
# - { 'topic1' => true } - will pick first offset
|
24
|
+
# - { 'topic1' => { 1 => true } } - will pick first offset on this CG for partition 1
|
25
|
+
# - { 'topic1' => true } - will pick first offset for all partitions
|
26
|
+
# - { 'topic1' => :earliest } - will pick earliest offset for all partitions
|
27
|
+
# - { 'topic1' => :latest } - will pick latest (high-watermark) for all partitions
|
26
28
|
class Expander
|
27
29
|
# Expands topics to which we want to subscribe with partitions information in case this
|
28
30
|
# info is not provided.
|
@@ -80,7 +82,7 @@ module Karafka
|
|
80
82
|
.find { |topic| topic.fetch(:topic_name) == name }
|
81
83
|
.tap { |topic| topic || raise(Errors::TopicNotFoundError, name) }
|
82
84
|
.fetch(:partitions)
|
83
|
-
.
|
85
|
+
.size
|
84
86
|
end
|
85
87
|
end
|
86
88
|
end
|
@@ -14,6 +14,11 @@ module Karafka
|
|
14
14
|
# This builder resolves that and builds a tpl to which we can safely subscribe the way
|
15
15
|
# we want it.
|
16
16
|
class TplBuilder
|
17
|
+
# Supported named offset positions that we can reference via their name
|
18
|
+
SUPPORTED_NAMED_POSITIONS = %w[earliest latest].freeze
|
19
|
+
|
20
|
+
private_constant :SUPPORTED_NAMED_POSITIONS
|
21
|
+
|
17
22
|
# @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
|
18
23
|
# @param expanded_topics [Hash] hash with expanded and normalized topics data
|
19
24
|
def initialize(consumer, expanded_topics)
|
@@ -28,6 +33,7 @@ module Karafka
|
|
28
33
|
resolve_partitions_with_exact_offsets
|
29
34
|
resolve_partitions_with_negative_offsets
|
30
35
|
resolve_partitions_with_time_offsets
|
36
|
+
resolve_partitions_with_named_offsets
|
31
37
|
resolve_partitions_with_cg_expectations
|
32
38
|
|
33
39
|
# Final tpl with all the data
|
@@ -143,6 +149,23 @@ module Karafka
|
|
143
149
|
end
|
144
150
|
end
|
145
151
|
|
152
|
+
# If we get named offsets, we can just remap them to librdkafka special offset positions
|
153
|
+
def resolve_partitions_with_named_offsets
|
154
|
+
@expanded_topics.each do |name, partitions|
|
155
|
+
next unless partitions.is_a?(Hash)
|
156
|
+
|
157
|
+
partitions.each do |partition, offset|
|
158
|
+
# Skip offsets that do not match our named expectations
|
159
|
+
named_offset = offset.to_s
|
160
|
+
|
161
|
+
next unless SUPPORTED_NAMED_POSITIONS.include?(named_offset)
|
162
|
+
|
163
|
+
@mapped_topics[name][partition] = -1 if named_offset == 'latest'
|
164
|
+
@mapped_topics[name][partition] = -2 if named_offset == 'earliest'
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
146
169
|
# Fetches last used offsets for those partitions for which we want to consume from last
|
147
170
|
# moment where given consumer group has finished
|
148
171
|
# This is indicated by given partition value being set to `true`.
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -60,6 +60,15 @@ module Karafka
|
|
60
60
|
Processing::SubscriptionGroupsCoordinator.instance
|
61
61
|
end
|
62
62
|
|
63
|
+
# Runs operations needed after fork in swarm for features that need it
|
64
|
+
#
|
65
|
+
# @param config [Karafka::Core::Configurable::Node]
|
66
|
+
# @param pre_fork_producer [WaterDrop::Producer] pre fork producer instance that may be
|
67
|
+
# needed to be replaced with newly changed one post-fork.
|
68
|
+
def post_fork(config, pre_fork_producer)
|
69
|
+
features.each { |feature| feature.post_fork(config, pre_fork_producer) }
|
70
|
+
end
|
71
|
+
|
63
72
|
private
|
64
73
|
|
65
74
|
# @return [Array<Module>] extra non-routing related pro features and routing components
|
@@ -84,6 +93,7 @@ module Karafka
|
|
84
93
|
icfg.connection.manager = Connection::Manager.new
|
85
94
|
|
86
95
|
icfg.processing.coordinator_class = Processing::Coordinator
|
96
|
+
icfg.processing.errors_tracker_class = Processing::Coordinators::ErrorsTracker
|
87
97
|
icfg.processing.partitioner_class = Processing::Partitioner
|
88
98
|
icfg.processing.scheduler_class = Processing::Schedulers::Default
|
89
99
|
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
@@ -10,6 +10,9 @@ module Karafka
|
|
10
10
|
# within the same partition
|
11
11
|
class Coordinator < ::Karafka::Processing::Coordinator
|
12
12
|
extend Forwardable
|
13
|
+
include Helpers::ConfigImporter.new(
|
14
|
+
errors_tracker_class: %i[internal processing errors_tracker_class]
|
15
|
+
)
|
13
16
|
|
14
17
|
def_delegators :@collapser, :collapsed?, :collapse_until!
|
15
18
|
|
@@ -20,7 +23,7 @@ module Karafka
|
|
20
23
|
super
|
21
24
|
|
22
25
|
@executed = []
|
23
|
-
@errors_tracker =
|
26
|
+
@errors_tracker = errors_tracker_class.new(topic, partition)
|
24
27
|
@flow_mutex = Mutex.new
|
25
28
|
# Lock for user code synchronization
|
26
29
|
# We do not want to mix coordinator lock with the user lock not to create cases where
|
@@ -13,25 +13,47 @@ module Karafka
|
|
13
13
|
class ErrorsTracker
|
14
14
|
include Enumerable
|
15
15
|
|
16
|
+
# @return [Karafka::Routing::Topic] topic of this error tracker
|
17
|
+
attr_reader :topic
|
18
|
+
|
19
|
+
# @return [Integer] partition of this error tracker
|
20
|
+
attr_reader :partition
|
21
|
+
|
22
|
+
# @return [Hash]
|
23
|
+
attr_reader :counts
|
24
|
+
|
16
25
|
# Max errors we keep in memory.
|
17
26
|
# We do not want to keep more because for DLQ-less this would cause memory-leaks.
|
27
|
+
# We do however count per class for granular error counting
|
18
28
|
STORAGE_LIMIT = 100
|
19
29
|
|
20
30
|
private_constant :STORAGE_LIMIT
|
21
31
|
|
22
|
-
|
32
|
+
# @param topic [Karafka::Routing::Topic]
|
33
|
+
# @param partition [Integer]
|
34
|
+
# @param limit [Integer] max number of errors we want to keep for reference when
|
35
|
+
# implementing custom error handling.
|
36
|
+
# @note `limit` does not apply to the counts. They will work beyond the number of errors
|
37
|
+
# occurring
|
38
|
+
def initialize(topic, partition, limit: STORAGE_LIMIT)
|
23
39
|
@errors = []
|
40
|
+
@counts = Hash.new { |hash, key| hash[key] = 0 }
|
41
|
+
@topic = topic
|
42
|
+
@partition = partition
|
43
|
+
@limit = limit
|
24
44
|
end
|
25
45
|
|
26
46
|
# Clears all the errors
|
27
47
|
def clear
|
28
48
|
@errors.clear
|
49
|
+
@counts.clear
|
29
50
|
end
|
30
51
|
|
31
52
|
# @param error [StandardError] adds the error to the tracker
|
32
53
|
def <<(error)
|
33
|
-
@errors.shift if @errors.size >=
|
54
|
+
@errors.shift if @errors.size >= @limit
|
34
55
|
@errors << error
|
56
|
+
@counts[error.class] += 1
|
35
57
|
end
|
36
58
|
|
37
59
|
# @return [Boolean] is the error tracker empty
|
@@ -41,7 +63,9 @@ module Karafka
|
|
41
63
|
|
42
64
|
# @return [Integer] number of elements
|
43
65
|
def size
|
44
|
-
|
66
|
+
# We use counts reference of all errors and not the `@errors` array because it allows
|
67
|
+
# us to go beyond the whole errors storage limit
|
68
|
+
@counts.values.sum
|
45
69
|
end
|
46
70
|
|
47
71
|
# @return [StandardError, nil] last error that occurred or nil if no errors
|
@@ -98,6 +98,17 @@ module Karafka
|
|
98
98
|
:mark_as_consumed
|
99
99
|
end
|
100
100
|
|
101
|
+
# The first (lowest) message we want to mark as consumed in marking. By default it uses
|
102
|
+
# same position as cursor in case user wants to mark same message as consumed as the
|
103
|
+
# one on which cursor action is applied.
|
104
|
+
# @return [Karafka::Messages::Message, nil] cursor marking message or nil if none
|
105
|
+
# @note It should not return position in time format, only numerical offset
|
106
|
+
def marking_cursor
|
107
|
+
return nil unless active?
|
108
|
+
|
109
|
+
applied.map(&:marking_cursor).compact.min_by(&:offset)
|
110
|
+
end
|
111
|
+
|
101
112
|
private
|
102
113
|
|
103
114
|
# @return [Boolean] is filtering active
|
@@ -42,9 +42,11 @@ module Karafka
|
|
42
42
|
@applied
|
43
43
|
end
|
44
44
|
|
45
|
-
# @return [Integer] default timeout for pausing (if applicable)
|
45
|
+
# @return [Integer, nil] default timeout for pausing (if applicable) or nil if not
|
46
|
+
# @note Please do not return `0` when your filter is not pausing as it may interact
|
47
|
+
# with other filters that want to pause.
|
46
48
|
def timeout
|
47
|
-
|
49
|
+
nil
|
48
50
|
end
|
49
51
|
|
50
52
|
# @return [Boolean] should we use the cursor value to mark as consumed. If any of the
|
@@ -58,6 +60,12 @@ module Karafka
|
|
58
60
|
def marking_method
|
59
61
|
:mark_as_consumed
|
60
62
|
end
|
63
|
+
|
64
|
+
# @return [Karafka::Messages::Message, nil] cursor message for marking or nil if no
|
65
|
+
# marking
|
66
|
+
def marking_cursor
|
67
|
+
cursor
|
68
|
+
end
|
61
69
|
end
|
62
70
|
end
|
63
71
|
end
|