karafka 2.4.18 → 2.5.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +58 -14
- data/.github/workflows/push.yml +36 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +60 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +69 -50
- data/LICENSE-COMM +2 -2
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/bin/clean_kafka +43 -0
- data/bin/integrations +19 -6
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +3 -0
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/json/enrollment_event.json +579 -0
- data/examples/payloads/json/ingestion_event.json +30 -0
- data/examples/payloads/json/transaction_event.json +17 -0
- data/examples/payloads/json/user_event.json +11 -0
- data/karafka.gemspec +3 -8
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin/configs.rb +5 -1
- data/lib/karafka/admin.rb +69 -34
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/cli/topics/align.rb +7 -4
- data/lib/karafka/cli/topics/base.rb +17 -0
- data/lib/karafka/cli/topics/create.rb +9 -7
- data/lib/karafka/cli/topics/delete.rb +4 -2
- data/lib/karafka/cli/topics/help.rb +39 -0
- data/lib/karafka/cli/topics/repartition.rb +4 -2
- data/lib/karafka/cli/topics.rb +10 -3
- data/lib/karafka/cli.rb +2 -0
- data/lib/karafka/connection/client.rb +30 -9
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +3 -0
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/config.rb +3 -0
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +46 -2
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +86 -23
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +8 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +13 -0
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +19 -21
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages.rb +13 -0
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +19 -19
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +22 -1
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +4 -0
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- metadata +40 -40
- checksums.yaml.gz.sig +0 -0
- data/certs/cert.pem +0 -26
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
@@ -8,15 +8,33 @@ module Karafka
|
|
8
8
|
module Routing
|
9
9
|
module Features
|
10
10
|
class VirtualPartitions < Base
|
11
|
-
#
|
11
|
+
# Configuration for virtual partitions feature
|
12
12
|
Config = Struct.new(
|
13
13
|
:active,
|
14
14
|
:partitioner,
|
15
15
|
:max_partitions,
|
16
16
|
:offset_metadata_strategy,
|
17
17
|
:reducer,
|
18
|
+
:distribution,
|
18
19
|
keyword_init: true
|
19
|
-
)
|
20
|
+
) do
|
21
|
+
# @return [Boolean] is this feature active
|
22
|
+
def active?
|
23
|
+
active
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Object] distributor instance for the current distribution
|
27
|
+
def distributor
|
28
|
+
@distributor ||= case distribution
|
29
|
+
when :balanced
|
30
|
+
Processing::VirtualPartitions::Distributors::Balanced.new(self)
|
31
|
+
when :consistent
|
32
|
+
Processing::VirtualPartitions::Distributors::Consistent.new(self)
|
33
|
+
else
|
34
|
+
raise Karafka::Errors::UnsupportedCaseError, distribution
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
20
38
|
end
|
21
39
|
end
|
22
40
|
end
|
@@ -26,6 +26,7 @@ module Karafka
|
|
26
26
|
required(:reducer) { |val| val.respond_to?(:call) }
|
27
27
|
required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
|
28
28
|
required(:offset_metadata_strategy) { |val| %i[exact current].include?(val) }
|
29
|
+
required(:distribution) { |val| %i[consistent balanced].include?(val) }
|
29
30
|
end
|
30
31
|
|
31
32
|
# When virtual partitions are defined, partitioner needs to respond to `#call` and it
|
@@ -20,13 +20,18 @@ module Karafka
|
|
20
20
|
# the most recently reported metadata
|
21
21
|
# @param reducer [nil, #call] reducer for VPs key. It allows for using a custom
|
22
22
|
# reducer to achieve enhanced parallelization when the default reducer is not enough.
|
23
|
+
# @param distribution [Symbol] the strategy to use for virtual partitioning. Can be
|
24
|
+
# either `:consistent` or `:balanced`. The `:balanced` strategy ensures balanced
|
25
|
+
# distribution of work across available workers while maintaining message order
|
26
|
+
# within groups.
|
23
27
|
# @return [VirtualPartitions] method that allows to set the virtual partitions details
|
24
28
|
# during the routing configuration and then allows to retrieve it
|
25
29
|
def virtual_partitions(
|
26
30
|
max_partitions: Karafka::App.config.concurrency,
|
27
31
|
partitioner: nil,
|
28
32
|
offset_metadata_strategy: :current,
|
29
|
-
reducer: nil
|
33
|
+
reducer: nil,
|
34
|
+
distribution: :consistent
|
30
35
|
)
|
31
36
|
@virtual_partitions ||= Config.new(
|
32
37
|
active: !partitioner.nil?,
|
@@ -35,7 +40,8 @@ module Karafka
|
|
35
40
|
offset_metadata_strategy: offset_metadata_strategy,
|
36
41
|
# If no reducer provided, we use this one. It just runs a modulo on the sum of
|
37
42
|
# a stringified version, providing fairly good distribution.
|
38
|
-
reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions }
|
43
|
+
reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions },
|
44
|
+
distribution: distribution
|
39
45
|
)
|
40
46
|
end
|
41
47
|
|
@@ -8,6 +8,10 @@ module Karafka
|
|
8
8
|
module ScheduledMessages
|
9
9
|
# Consumer that coordinates scheduling of messages when the time comes
|
10
10
|
class Consumer < ::Karafka::BaseConsumer
|
11
|
+
include Helpers::ConfigImporter.new(
|
12
|
+
dispatcher_class: %i[scheduled_messages dispatcher_class]
|
13
|
+
)
|
14
|
+
|
11
15
|
# Prepares the initial state of all stateful components
|
12
16
|
def initialized
|
13
17
|
clear!
|
@@ -52,6 +56,9 @@ module Karafka
|
|
52
56
|
|
53
57
|
# If end of the partition is reached, it always means all data is loaded
|
54
58
|
@state.loaded!
|
59
|
+
|
60
|
+
tags.add(:state, @state.to_s)
|
61
|
+
|
55
62
|
@states_reporter.call
|
56
63
|
end
|
57
64
|
|
@@ -64,7 +71,6 @@ module Karafka
|
|
64
71
|
return unless @state.loaded?
|
65
72
|
|
66
73
|
keys = []
|
67
|
-
epochs = []
|
68
74
|
|
69
75
|
# We first collect all the data for dispatch and then dispatch and **only** after
|
70
76
|
# dispatch that is sync is successful we remove those messages from the daily buffer
|
@@ -72,16 +78,13 @@ module Karafka
|
|
72
78
|
# with timeouts, etc, we need to be sure it wen through prior to deleting those messages
|
73
79
|
# from the daily buffer. That way we ensure the at least once delivery and in case of
|
74
80
|
# a transactional producer, exactly once delivery.
|
75
|
-
@daily_buffer.for_dispatch do |
|
76
|
-
epochs << epoch
|
81
|
+
@daily_buffer.for_dispatch do |message|
|
77
82
|
keys << message.key
|
78
83
|
@dispatcher << message
|
79
84
|
end
|
80
85
|
|
81
86
|
@dispatcher.flush
|
82
87
|
|
83
|
-
@max_epoch.update(epochs.max)
|
84
|
-
|
85
88
|
keys.each { |key| @daily_buffer.delete(key) }
|
86
89
|
|
87
90
|
@states_reporter.call
|
@@ -93,14 +96,6 @@ module Karafka
|
|
93
96
|
# accumulator and time related per-message operations.
|
94
97
|
# @param message [Karafka::Messages::Message]
|
95
98
|
def process_message(message)
|
96
|
-
# If we started to receive messages younger than the moment we created the consumer for
|
97
|
-
# the given day, it means we have loaded all the history and we are no longer in the
|
98
|
-
# loading phase.
|
99
|
-
if message.timestamp.to_i > @today.created_at
|
100
|
-
@state.loaded!
|
101
|
-
tags.add(:state, @state.to_s)
|
102
|
-
end
|
103
|
-
|
104
99
|
# If this is a schedule message we need to check if this is for today. Tombstone events
|
105
100
|
# are always considered immediate as they indicate, that a message with a given key
|
106
101
|
# was already dispatched or that user decided not to dispatch and cancelled the dispatch
|
@@ -120,6 +115,14 @@ module Karafka
|
|
120
115
|
end
|
121
116
|
end
|
122
117
|
|
118
|
+
# Tombstone events are only published after we have dispatched given message. This means
|
119
|
+
# that we've got that far in the dispatching time. This allows us (with a certain buffer)
|
120
|
+
# to quickly reject older messages (older in sense of being scheduled for previous times)
|
121
|
+
# instead of loading them into memory until they are expired
|
122
|
+
if message.headers['schedule_source_type'] == 'tombstone'
|
123
|
+
@max_epoch.update(message.headers['schedule_target_epoch'])
|
124
|
+
end
|
125
|
+
|
123
126
|
# Add to buffer all tombstones and messages for the same day
|
124
127
|
@daily_buffer << message
|
125
128
|
end
|
@@ -129,7 +132,7 @@ module Karafka
|
|
129
132
|
# If this is a new assignment we always need to seek from beginning to load the data
|
130
133
|
if @state.fresh?
|
131
134
|
clear!
|
132
|
-
seek(
|
135
|
+
seek(:earliest)
|
133
136
|
|
134
137
|
return true
|
135
138
|
end
|
@@ -140,7 +143,7 @@ module Karafka
|
|
140
143
|
# If day has ended we reload and start new day with new schedules
|
141
144
|
if @today.ended?
|
142
145
|
clear!
|
143
|
-
seek(
|
146
|
+
seek(:earliest)
|
144
147
|
|
145
148
|
return true
|
146
149
|
end
|
@@ -156,7 +159,7 @@ module Karafka
|
|
156
159
|
@today = Day.new
|
157
160
|
@tracker = Tracker.new
|
158
161
|
@state = State.new(false)
|
159
|
-
@dispatcher =
|
162
|
+
@dispatcher = dispatcher_class.new(topic.name, partition)
|
160
163
|
@states_reporter = Helpers::IntervalRunner.new do
|
161
164
|
@tracker.today = @daily_buffer.size
|
162
165
|
@tracker.state = @state.to_s
|
@@ -166,11 +169,6 @@ module Karafka
|
|
166
169
|
|
167
170
|
tags.add(:state, @state.to_s)
|
168
171
|
end
|
169
|
-
|
170
|
-
# @return [Karafka::Core::Configurable::Node] Schedules config node
|
171
|
-
def config
|
172
|
-
@config ||= Karafka::App.config.scheduled_messages
|
173
|
-
end
|
174
172
|
end
|
175
173
|
end
|
176
174
|
end
|
@@ -45,19 +45,22 @@ module Karafka
|
|
45
45
|
|
46
46
|
# Yields messages that should be dispatched (sent) to Kafka
|
47
47
|
#
|
48
|
-
# @yieldparam [
|
49
|
-
#
|
50
|
-
#
|
51
|
-
# @note We yield epoch alongside of the message so we do not have to extract it several
|
52
|
-
# times later on. This simplifies the API
|
48
|
+
# @yieldparam [Karafka::Messages::Message] messages to be dispatched sorted from the once
|
49
|
+
# that are the oldest (lowest epoch)
|
53
50
|
def for_dispatch
|
54
51
|
dispatch = Time.now.to_i
|
55
52
|
|
53
|
+
selected = []
|
54
|
+
|
56
55
|
@accu.each_value do |epoch, message|
|
57
56
|
next unless epoch <= dispatch
|
58
57
|
|
59
|
-
|
58
|
+
selected << [epoch, message]
|
60
59
|
end
|
60
|
+
|
61
|
+
selected
|
62
|
+
.sort_by!(&:first)
|
63
|
+
.each { |_, message| yield(message) }
|
61
64
|
end
|
62
65
|
|
63
66
|
# Removes given key from the accumulator
|
@@ -10,6 +10,12 @@ module Karafka
|
|
10
10
|
module Deserializers
|
11
11
|
# Converts certain pieces of headers into their integer form for messages
|
12
12
|
class Headers
|
13
|
+
# We only directly operate on epoch and other details for schedules and tombstones.
|
14
|
+
# cancel requests don't have to be deserialized that way since they don't have epoch
|
15
|
+
WORKABLE_TYPES = %w[schedule tombstone].freeze
|
16
|
+
|
17
|
+
private_constant :WORKABLE_TYPES
|
18
|
+
|
13
19
|
# @param metadata [Karafka::aMessages::Metadata]
|
14
20
|
# @return [Hash] headers
|
15
21
|
def call(metadata)
|
@@ -19,7 +25,7 @@ module Karafka
|
|
19
25
|
|
20
26
|
# tombstone and cancellation events are not operable, thus we do not have to cast any
|
21
27
|
# of the headers pieces
|
22
|
-
return raw_headers unless type
|
28
|
+
return raw_headers unless WORKABLE_TYPES.include?(type)
|
23
29
|
|
24
30
|
headers = raw_headers.dup
|
25
31
|
headers['schedule_target_epoch'] = headers['schedule_target_epoch'].to_i
|
@@ -10,22 +10,31 @@ module Karafka
|
|
10
10
|
# until which messages were dispatched by us. This allows us to quickly skip those messages
|
11
11
|
# during recovery, because we do know, they were dispatched.
|
12
12
|
class MaxEpoch
|
13
|
+
# We always give a bit of a buffer when using the max dispatch epoch because while we
|
14
|
+
# are dispatching messages, we could also later receive data for time close to our
|
15
|
+
# dispatch times. This is why when reloading days we give ourselves one hour of a window
|
16
|
+
# that we will keep until tombstones expire them. This prevents edge cases race-conditions
|
17
|
+
# when multiple scheduled events scheduled close to each other would bump epoch in such a
|
18
|
+
# way, that it would end up ignoring certain events.
|
19
|
+
GRACE_PERIOD = 60 * 60
|
20
|
+
|
21
|
+
private_constant :GRACE_PERIOD
|
22
|
+
|
23
|
+
# @return [Integer] max epoch recorded
|
24
|
+
attr_reader :to_i
|
25
|
+
|
13
26
|
def initialize
|
14
27
|
@max = -1
|
28
|
+
@to_i = @max
|
15
29
|
end
|
16
30
|
|
17
31
|
# Updates epoch if bigger than current max
|
18
32
|
# @param new_max [Integer] potential new max epoch
|
19
33
|
def update(new_max)
|
20
|
-
return unless new_max
|
21
34
|
return unless new_max > @max
|
22
35
|
|
23
36
|
@max = new_max
|
24
|
-
|
25
|
-
|
26
|
-
# @return [Integer] max epoch recorded
|
27
|
-
def to_i
|
28
|
-
@max
|
37
|
+
@to_i = @max - GRACE_PERIOD
|
29
38
|
end
|
30
39
|
end
|
31
40
|
end
|
@@ -53,6 +53,19 @@ module Karafka
|
|
53
53
|
def post_setup(config)
|
54
54
|
RecurringTasks::Contracts::Config.new.validate!(config.to_h)
|
55
55
|
end
|
56
|
+
|
57
|
+
# Basically since we may have custom producers configured that are not the same as the
|
58
|
+
# default one, we hold a reference to old pre-fork producer. This means, that when we
|
59
|
+
# initialize it again in post-fork, as long as user uses defaults we should re-inherit
|
60
|
+
# it from the default config.
|
61
|
+
#
|
62
|
+
# @param config [Karafka::Core::Configurable::Node]
|
63
|
+
# @param pre_fork_producer [WaterDrop::Producer]
|
64
|
+
def post_fork(config, pre_fork_producer)
|
65
|
+
return unless config.scheduled_messages.producer == pre_fork_producer
|
66
|
+
|
67
|
+
config.scheduled_messages.producer = config.producer
|
68
|
+
end
|
56
69
|
end
|
57
70
|
end
|
58
71
|
end
|
@@ -22,6 +22,7 @@ module Karafka
|
|
22
22
|
|
23
23
|
# @param topic_name [String] topic name
|
24
24
|
# @param partition [Integer] partition number
|
25
|
+
# @return [Karafka::Processing::Coordinator] found or created coordinator
|
25
26
|
def find_or_create(topic_name, partition)
|
26
27
|
@coordinators[topic_name][partition] ||= begin
|
27
28
|
routing_topic = @topics.find(topic_name)
|
@@ -55,8 +55,8 @@ module Karafka
|
|
55
55
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
56
56
|
# In case like this we ignore marking
|
57
57
|
return true if seek_offset.nil?
|
58
|
-
# Ignore
|
59
|
-
return true if seek_offset
|
58
|
+
# Ignore double markings of the same offset
|
59
|
+
return true if (seek_offset - 1) == message.offset
|
60
60
|
return false if revoked?
|
61
61
|
return revoked? unless client.mark_as_consumed(message)
|
62
62
|
|
@@ -74,8 +74,8 @@ module Karafka
|
|
74
74
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
75
75
|
# In case like this we ignore marking
|
76
76
|
return true if seek_offset.nil?
|
77
|
-
# Ignore
|
78
|
-
return true if seek_offset
|
77
|
+
# Ignore double markings of the same offset
|
78
|
+
return true if (seek_offset - 1) == message.offset
|
79
79
|
return false if revoked?
|
80
80
|
|
81
81
|
return revoked? unless client.mark_as_consumed!(message)
|
@@ -46,6 +46,7 @@ module Karafka
|
|
46
46
|
# When topic is set to false, it means we just want to skip dispatch on DLQ
|
47
47
|
next if topic == false
|
48
48
|
next if topic.is_a?(String) && topic_regexp.match?(topic)
|
49
|
+
next if topic == :strategy
|
49
50
|
|
50
51
|
[[%i[dead_letter_queue topic], :format]]
|
51
52
|
end
|
data/lib/karafka/runner.rb
CHANGED
@@ -4,6 +4,7 @@ module Karafka
|
|
4
4
|
# Class used to run the Karafka listeners in separate threads
|
5
5
|
class Runner
|
6
6
|
include Helpers::ConfigImporter.new(
|
7
|
+
worker_thread_priority: %i[worker_thread_priority],
|
7
8
|
manager: %i[internal connection manager],
|
8
9
|
conductor: %i[internal connection conductor],
|
9
10
|
jobs_queue_class: %i[internal processing jobs_queue_class]
|
@@ -26,7 +27,12 @@ module Karafka
|
|
26
27
|
# Register all the listeners so they can be started and managed
|
27
28
|
manager.register(listeners)
|
28
29
|
|
29
|
-
workers.each_with_index
|
30
|
+
workers.each_with_index do |worker, i|
|
31
|
+
worker.async_call(
|
32
|
+
"karafka.worker##{i}",
|
33
|
+
worker_thread_priority
|
34
|
+
)
|
35
|
+
end
|
30
36
|
|
31
37
|
# We aggregate threads here for a supervised shutdown process
|
32
38
|
Karafka::Server.workers = workers
|
data/lib/karafka/server.rb
CHANGED
@@ -9,6 +9,15 @@ module Karafka
|
|
9
9
|
|
10
10
|
private_constant :FORCEFUL_SHUTDOWN_WAIT
|
11
11
|
|
12
|
+
extend Helpers::ConfigImporter.new(
|
13
|
+
cli_contract: %i[internal cli contract],
|
14
|
+
activity_manager: %i[internal routing activity_manager],
|
15
|
+
supervision_sleep: %i[internal supervision_sleep],
|
16
|
+
shutdown_timeout: %i[shutdown_timeout],
|
17
|
+
forceful_exit_code: %i[internal forceful_exit_code],
|
18
|
+
process: %i[internal process]
|
19
|
+
)
|
20
|
+
|
12
21
|
class << self
|
13
22
|
# Set of consuming threads. Each consumer thread contains a single consumer
|
14
23
|
attr_accessor :listeners
|
@@ -30,6 +39,9 @@ module Karafka
|
|
30
39
|
# as not everything is possible when operating in non-standalone mode, etc.
|
31
40
|
attr_accessor :execution_mode
|
32
41
|
|
42
|
+
# id of the server. Useful for logging when we want to reference things issued by the server.
|
43
|
+
attr_accessor :id
|
44
|
+
|
33
45
|
# Method which runs app
|
34
46
|
def run
|
35
47
|
self.listeners = []
|
@@ -39,9 +51,7 @@ module Karafka
|
|
39
51
|
# embedded
|
40
52
|
# We cannot validate this during the start because config needs to be populated and routes
|
41
53
|
# need to be defined.
|
42
|
-
|
43
|
-
config.internal.routing.activity_manager.to_h
|
44
|
-
)
|
54
|
+
cli_contract.validate!(activity_manager.to_h)
|
45
55
|
|
46
56
|
# We clear as we do not want parent handlers in case of working from fork
|
47
57
|
process.clear
|
@@ -96,18 +106,18 @@ module Karafka
|
|
96
106
|
|
97
107
|
Karafka::App.stop!
|
98
108
|
|
99
|
-
timeout =
|
109
|
+
timeout = shutdown_timeout
|
100
110
|
|
101
111
|
# We check from time to time (for the timeout period) if all the threads finished
|
102
112
|
# their work and if so, we can just return and normal shutdown process will take place
|
103
113
|
# We divide it by 1000 because we use time in ms.
|
104
|
-
((timeout / 1_000) * (1 /
|
114
|
+
((timeout / 1_000) * (1 / supervision_sleep)).to_i.times do
|
105
115
|
all_listeners_stopped = listeners.all?(&:stopped?)
|
106
116
|
all_workers_stopped = workers.none?(&:alive?)
|
107
117
|
|
108
118
|
return if all_listeners_stopped && all_workers_stopped
|
109
119
|
|
110
|
-
sleep(
|
120
|
+
sleep(supervision_sleep)
|
111
121
|
end
|
112
122
|
|
113
123
|
raise Errors::ForcefulShutdownError
|
@@ -145,7 +155,7 @@ module Karafka
|
|
145
155
|
return unless process.supervised?
|
146
156
|
|
147
157
|
# exit! is not within the instrumentation as it would not trigger due to exit
|
148
|
-
Kernel.exit!(
|
158
|
+
Kernel.exit!(forceful_exit_code)
|
149
159
|
ensure
|
150
160
|
# We need to check if it wasn't an early exit to make sure that only on stop invocation
|
151
161
|
# can change the status after everything is closed
|
@@ -169,23 +179,13 @@ module Karafka
|
|
169
179
|
# in one direction
|
170
180
|
Karafka::App.quiet!
|
171
181
|
end
|
172
|
-
|
173
|
-
private
|
174
|
-
|
175
|
-
# @return [Karafka::Core::Configurable::Node] root config node
|
176
|
-
def config
|
177
|
-
Karafka::App.config
|
178
|
-
end
|
179
|
-
|
180
|
-
# @return [Karafka::Process] process wrapper instance used to catch system signal calls
|
181
|
-
def process
|
182
|
-
config.internal.process
|
183
|
-
end
|
184
182
|
end
|
185
183
|
|
186
184
|
# Always start with standalone so there always is a value for the execution mode.
|
187
185
|
# This is overwritten quickly during boot, but just in case someone would reach it prior to
|
188
186
|
# booting, we want to have the default value.
|
189
187
|
self.execution_mode = :standalone
|
188
|
+
|
189
|
+
self.id = SecureRandom.hex(6)
|
190
190
|
end
|
191
191
|
end
|
@@ -73,6 +73,7 @@ module Karafka
|
|
73
73
|
message.max.bytes
|
74
74
|
metadata.broker.list
|
75
75
|
metadata.max.age.ms
|
76
|
+
metadata.recovery.strategy
|
76
77
|
oauthbearer_token_refresh_cb
|
77
78
|
offset.store.method
|
78
79
|
offset.store.path
|
@@ -207,6 +208,7 @@ module Karafka
|
|
207
208
|
message.timeout.ms
|
208
209
|
metadata.broker.list
|
209
210
|
metadata.max.age.ms
|
211
|
+
metadata.recovery.strategy
|
210
212
|
msg_order_cmp
|
211
213
|
oauthbearer_token_refresh_cb
|
212
214
|
opaque
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -73,6 +73,9 @@ module Karafka
|
|
73
73
|
# Really useful when you want to ensure that all topics in routing are managed via
|
74
74
|
# declaratives.
|
75
75
|
setting :strict_declarative_topics, default: false
|
76
|
+
# Defaults to the CPU thread priority slice to -1 (50ms) to ensure that CPU intense
|
77
|
+
# processing does not affect other threads and prevents starvation
|
78
|
+
setting :worker_thread_priority, default: -1
|
76
79
|
|
77
80
|
setting :oauth do
|
78
81
|
# option [false, #call] Listener for using oauth bearer. This listener will be able to
|
@@ -133,6 +136,14 @@ module Karafka
|
|
133
136
|
# How many times should be try. 1 000 ms x 60 => 60 seconds wait in total and then we give
|
134
137
|
# up on pending operations
|
135
138
|
setting :max_attempts, default: 60
|
139
|
+
|
140
|
+
# option poll_timeout [Integer] time in ms
|
141
|
+
# How long should a poll wait before yielding on no results (rdkafka-ruby setting)
|
142
|
+
# Lower value can be especially useful when working with Web UI, because it allows for
|
143
|
+
# increased responsiveness. Many admin operations do not take 100ms but they wait on poll
|
144
|
+
# until then prior to finishing, blocking the execution. Lowering to 25 ms can
|
145
|
+
# improve responsiveness of the Web UI. 50ms is a good trade-off for admin.
|
146
|
+
setting :poll_timeout, default: 50
|
136
147
|
end
|
137
148
|
|
138
149
|
# Namespace for internal settings that should not be modified directly
|
@@ -211,6 +222,10 @@ module Karafka
|
|
211
222
|
# How long should we wait before a critical listener recovery
|
212
223
|
# Too short may cause endless rebalance loops
|
213
224
|
setting :reset_backoff, default: 60_000
|
225
|
+
# Similar to the `#worker_thread_priority`. Listener threads do not operate for long
|
226
|
+
# time and release GVL on polling but we provide this for API consistency and some
|
227
|
+
# special edge cases.
|
228
|
+
setting :listener_thread_priority, default: 0
|
214
229
|
|
215
230
|
# Settings that are altered by our client proxy layer
|
216
231
|
setting :proxy do
|
@@ -282,6 +297,9 @@ module Karafka
|
|
282
297
|
setting :jobs_builder, default: Processing::JobsBuilder.new
|
283
298
|
# option coordinator [Class] work coordinator we want to user for processing coordination
|
284
299
|
setting :coordinator_class, default: Processing::Coordinator
|
300
|
+
# option errors_tracker_class [Class, nil] errors tracker that is used by the coordinator
|
301
|
+
# for granular error tracking. `nil` for OSS as it is not in use.
|
302
|
+
setting :errors_tracker_class, default: nil
|
285
303
|
# option partitioner_class [Class] partitioner we use against a batch of data
|
286
304
|
setting :partitioner_class, default: Processing::Partitioner
|
287
305
|
# option strategy_selector [Object] processing strategy selector to be used
|
@@ -367,7 +385,10 @@ module Karafka
|
|
367
385
|
config.producer ||= ::WaterDrop::Producer.new do |producer_config|
|
368
386
|
# In some cases WaterDrop updates the config and we don't want our consumer config to
|
369
387
|
# be polluted by those updates, that's why we copy
|
370
|
-
|
388
|
+
producer_kafka = AttributesMap.producer(config.kafka.dup)
|
389
|
+
# We inject some defaults (mostly for dev) unless user defined them
|
390
|
+
Setup::DefaultsInjector.producer(producer_kafka)
|
391
|
+
producer_config.kafka = producer_kafka
|
371
392
|
# We also propagate same listener to the default producer to make sure, that the
|
372
393
|
# listener for oauth is also automatically used by the producer. That way we don't
|
373
394
|
# have to configure it manually for the default producer
|
@@ -36,7 +36,17 @@ module Karafka
|
|
36
36
|
'topic.metadata.refresh.interval.ms': 5_000
|
37
37
|
}.freeze
|
38
38
|
|
39
|
-
|
39
|
+
# Contains settings that should not be used in production but make life easier in dev
|
40
|
+
# It is applied only to the default producer. If users setup their own producers, then
|
41
|
+
# they have to set this by themselves.
|
42
|
+
PRODUCER_KAFKA_DEV_DEFAULTS = {
|
43
|
+
# For all of those same reasoning as for the consumer
|
44
|
+
'allow.auto.create.topics': 'true',
|
45
|
+
'topic.metadata.refresh.interval.ms': 5_000
|
46
|
+
}.freeze
|
47
|
+
|
48
|
+
private_constant :CONSUMER_KAFKA_DEFAULTS, :CONSUMER_KAFKA_DEV_DEFAULTS,
|
49
|
+
:PRODUCER_KAFKA_DEV_DEFAULTS
|
40
50
|
|
41
51
|
class << self
|
42
52
|
# Propagates the kafka setting defaults unless they are already present for consumer config
|
@@ -58,6 +68,21 @@ module Karafka
|
|
58
68
|
kafka_config[key] = value
|
59
69
|
end
|
60
70
|
end
|
71
|
+
|
72
|
+
# Propagates the kafka settings defaults unless they are already present for producer
|
73
|
+
# config. This makes it easier to set some values that users usually don't change but still
|
74
|
+
# allows them to overwrite the whole hash.
|
75
|
+
#
|
76
|
+
# @param kafka_config [Hash] kafka scoped config
|
77
|
+
def producer(kafka_config)
|
78
|
+
return if Karafka::App.env.production?
|
79
|
+
|
80
|
+
PRODUCER_KAFKA_DEV_DEFAULTS.each do |key, value|
|
81
|
+
next if kafka_config.key?(key)
|
82
|
+
|
83
|
+
kafka_config[key] = value
|
84
|
+
end
|
85
|
+
end
|
61
86
|
end
|
62
87
|
end
|
63
88
|
end
|
data/lib/karafka/status.rb
CHANGED
data/lib/karafka/swarm/node.rb
CHANGED
@@ -27,6 +27,18 @@ module Karafka
|
|
27
27
|
# @return [Integer] pid of the node
|
28
28
|
attr_reader :pid
|
29
29
|
|
30
|
+
# When re-creating a producer in the fork, those are not attributes we want to inherit
|
31
|
+
# from the parent process because they are updated in the fork. If user wants to take those
|
32
|
+
# from the parent process, he should redefine them by overwriting the whole producer.
|
33
|
+
SKIPPABLE_NEW_PRODUCER_ATTRIBUTES = %i[
|
34
|
+
id
|
35
|
+
kafka
|
36
|
+
logger
|
37
|
+
oauth
|
38
|
+
].freeze
|
39
|
+
|
40
|
+
private_constant :SKIPPABLE_NEW_PRODUCER_ATTRIBUTES
|
41
|
+
|
30
42
|
# @param id [Integer] number of the fork. Used for uniqueness setup for group client ids and
|
31
43
|
# other stuff where we need to know a unique reference of the fork in regards to the rest
|
32
44
|
# of them.
|
@@ -52,15 +64,32 @@ module Karafka
|
|
52
64
|
# an attempt to close it when finalized, meaning it would be kept in memory.
|
53
65
|
config.producer.close
|
54
66
|
|
67
|
+
old_producer = config.producer
|
68
|
+
old_producer_config = old_producer.config
|
69
|
+
|
55
70
|
# Supervisor producer is closed, hence we need a new one here
|
56
71
|
config.producer = ::WaterDrop::Producer.new do |p_config|
|
57
72
|
p_config.kafka = Setup::AttributesMap.producer(kafka.dup)
|
58
73
|
p_config.logger = config.logger
|
74
|
+
|
75
|
+
old_producer_config.to_h.each do |key, value|
|
76
|
+
next if SKIPPABLE_NEW_PRODUCER_ATTRIBUTES.include?(key)
|
77
|
+
|
78
|
+
p_config.public_send("#{key}=", value)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Namespaced attributes need to be migrated directly on their config node
|
82
|
+
old_producer_config.oauth.to_h.each do |key, value|
|
83
|
+
p_config.oauth.public_send("#{key}=", value)
|
84
|
+
end
|
59
85
|
end
|
60
86
|
|
61
87
|
@pid = ::Process.pid
|
62
88
|
@reader.close
|
63
89
|
|
90
|
+
# Certain features need to be reconfigured / reinitialized after fork in Pro
|
91
|
+
Pro::Loader.post_fork(config, old_producer) if Karafka.pro?
|
92
|
+
|
64
93
|
# Indicate we are alive right after start
|
65
94
|
healthy
|
66
95
|
|
@@ -69,6 +98,8 @@ module Karafka
|
|
69
98
|
monitor.instrument('swarm.node.after_fork', caller: self)
|
70
99
|
|
71
100
|
Karafka::Process.tags.add(:execution_mode, 'mode:swarm')
|
101
|
+
Karafka::Process.tags.add(:swarm_nodeid, "node:#{@id}")
|
102
|
+
|
72
103
|
Server.execution_mode = :swarm
|
73
104
|
Server.run
|
74
105
|
|
@@ -50,6 +50,10 @@ module Karafka
|
|
50
50
|
# producer (should not be initialized but just in case)
|
51
51
|
Karafka.producer.close
|
52
52
|
|
53
|
+
# Ensure rdkafka stuff is loaded into memory pre-fork. This will ensure, that we save
|
54
|
+
# few MB on forking as this will be already in memory.
|
55
|
+
Rdkafka::Bindings.rd_kafka_global_init
|
56
|
+
|
53
57
|
Karafka::App.warmup
|
54
58
|
|
55
59
|
manager.start
|