karafka 2.4.18 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +59 -15
- data/.github/workflows/push.yml +35 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +75 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +72 -53
- data/LICENSE-COMM +2 -2
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/bin/clean_kafka +43 -0
- data/bin/integrations +20 -6
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +5 -1
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/avro/.gitkeep +0 -0
- data/examples/payloads/json/sample_set_01/enrollment_event.json +579 -0
- data/examples/payloads/json/sample_set_01/ingestion_event.json +30 -0
- data/examples/payloads/json/sample_set_01/transaction_event.json +17 -0
- data/examples/payloads/json/sample_set_01/user_event.json +11 -0
- data/karafka.gemspec +3 -8
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/active_job/job_extensions.rb +4 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin/configs.rb +5 -1
- data/lib/karafka/admin.rb +89 -42
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/cli/topics/align.rb +7 -4
- data/lib/karafka/cli/topics/base.rb +17 -0
- data/lib/karafka/cli/topics/create.rb +9 -7
- data/lib/karafka/cli/topics/delete.rb +4 -2
- data/lib/karafka/cli/topics/help.rb +39 -0
- data/lib/karafka/cli/topics/repartition.rb +4 -2
- data/lib/karafka/cli/topics.rb +10 -3
- data/lib/karafka/cli.rb +2 -0
- data/lib/karafka/connection/client.rb +39 -9
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +4 -1
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/base.rb +3 -2
- data/lib/karafka/contracts/config.rb +5 -1
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +46 -2
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/helpers/interval_runner.rb +8 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +95 -32
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +17 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +29 -6
- data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +9 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +12 -1
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +32 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +15 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +21 -2
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +3 -2
- data/lib/karafka/pro/routing/features/swarm.rb +4 -1
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +61 -26
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/dispatcher.rb +2 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages/proxy.rb +15 -3
- data/lib/karafka/pro/scheduled_messages/serializer.rb +2 -4
- data/lib/karafka/pro/scheduled_messages/state.rb +20 -23
- data/lib/karafka/pro/scheduled_messages/tracker.rb +34 -8
- data/lib/karafka/pro/scheduled_messages.rb +17 -1
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/builder.rb +12 -3
- data/lib/karafka/routing/features/base/expander.rb +8 -2
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +21 -18
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +40 -7
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +9 -2
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- metadata +41 -40
- checksums.yaml.gz.sig +0 -0
- data/certs/cert.pem +0 -26
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
@@ -70,7 +70,8 @@ module Karafka
|
|
70
70
|
config.producer.produce_async(
|
71
71
|
topic: "#{@topic}#{config.states_postfix}",
|
72
72
|
payload: @serializer.state(tracker),
|
73
|
-
key
|
73
|
+
# We use the state as a key, so we always have one state transition data available
|
74
|
+
key: "#{tracker.state}_state",
|
74
75
|
partition: @partition,
|
75
76
|
headers: { 'zlib' => 'true' }
|
76
77
|
)
|
@@ -10,22 +10,31 @@ module Karafka
|
|
10
10
|
# until which messages were dispatched by us. This allows us to quickly skip those messages
|
11
11
|
# during recovery, because we do know, they were dispatched.
|
12
12
|
class MaxEpoch
|
13
|
+
# We always give a bit of a buffer when using the max dispatch epoch because while we
|
14
|
+
# are dispatching messages, we could also later receive data for time close to our
|
15
|
+
# dispatch times. This is why when reloading days we give ourselves one hour of a window
|
16
|
+
# that we will keep until tombstones expire them. This prevents edge cases race-conditions
|
17
|
+
# when multiple scheduled events scheduled close to each other would bump epoch in such a
|
18
|
+
# way, that it would end up ignoring certain events.
|
19
|
+
GRACE_PERIOD = 60 * 60
|
20
|
+
|
21
|
+
private_constant :GRACE_PERIOD
|
22
|
+
|
23
|
+
# @return [Integer] max epoch recorded
|
24
|
+
attr_reader :to_i
|
25
|
+
|
13
26
|
def initialize
|
14
27
|
@max = -1
|
28
|
+
@to_i = @max
|
15
29
|
end
|
16
30
|
|
17
31
|
# Updates epoch if bigger than current max
|
18
32
|
# @param new_max [Integer] potential new max epoch
|
19
33
|
def update(new_max)
|
20
|
-
return unless new_max
|
21
34
|
return unless new_max > @max
|
22
35
|
|
23
36
|
@max = new_max
|
24
|
-
|
25
|
-
|
26
|
-
# @return [Integer] max epoch recorded
|
27
|
-
def to_i
|
28
|
-
@max
|
37
|
+
@to_i = @max - GRACE_PERIOD
|
29
38
|
end
|
30
39
|
end
|
31
40
|
end
|
@@ -60,7 +60,11 @@ module Karafka
|
|
60
60
|
# We need to ensure that the message we want to proxy is fully legit. Otherwise, since
|
61
61
|
# we envelope details like target topic, we could end up having incorrect data to
|
62
62
|
# schedule
|
63
|
-
MSG_CONTRACT.validate!(
|
63
|
+
MSG_CONTRACT.validate!(
|
64
|
+
message,
|
65
|
+
WaterDrop::Errors::MessageInvalidError,
|
66
|
+
scope: %w[scheduled_messages message]
|
67
|
+
)
|
64
68
|
|
65
69
|
headers = (message[:headers] || {}).merge(
|
66
70
|
'schedule_schema_version' => ScheduledMessages::SCHEMA_VERSION,
|
@@ -166,9 +170,17 @@ module Karafka
|
|
166
170
|
# complies with our requirements
|
167
171
|
# @param proxy_message [Hash] our message envelope
|
168
172
|
def validate!(proxy_message)
|
169
|
-
POST_CONTRACT.validate!(
|
173
|
+
POST_CONTRACT.validate!(
|
174
|
+
proxy_message,
|
175
|
+
scope: %w[scheduled_messages message]
|
176
|
+
)
|
177
|
+
|
170
178
|
# After proxy specific validations we also ensure, that the final form is correct
|
171
|
-
MSG_CONTRACT.validate!(
|
179
|
+
MSG_CONTRACT.validate!(
|
180
|
+
proxy_message,
|
181
|
+
WaterDrop::Errors::MessageInvalidError,
|
182
|
+
scope: %w[scheduled_messages message]
|
183
|
+
)
|
172
184
|
end
|
173
185
|
end
|
174
186
|
end
|
@@ -16,10 +16,8 @@ module Karafka
|
|
16
16
|
def state(tracker)
|
17
17
|
data = {
|
18
18
|
schema_version: ScheduledMessages::STATES_SCHEMA_VERSION,
|
19
|
-
dispatched_at: float_now
|
20
|
-
|
21
|
-
daily: tracker.daily
|
22
|
-
}
|
19
|
+
dispatched_at: float_now
|
20
|
+
}.merge(tracker.to_h)
|
23
21
|
|
24
22
|
compress(
|
25
23
|
serialize(data)
|
@@ -15,38 +15,35 @@ module Karafka
|
|
15
15
|
# - loaded - state in which we finished loading all the schedules and we can dispatch
|
16
16
|
# messages when the time comes and we can process real-time incoming schedules and
|
17
17
|
# changes to schedules as they appear in the stream.
|
18
|
+
# - shutdown - the states are no longer available as the consumer has shut down
|
18
19
|
class State
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
# Available states scheduling of messages may be in
|
21
|
+
STATES = %w[
|
22
|
+
fresh
|
23
|
+
loading
|
24
|
+
loaded
|
25
|
+
stopped
|
26
|
+
].freeze
|
24
27
|
|
25
|
-
|
26
|
-
def fresh?
|
27
|
-
@loaded.nil?
|
28
|
-
end
|
28
|
+
private_constant :STATES
|
29
29
|
|
30
|
-
|
31
|
-
|
32
|
-
@loaded = true
|
30
|
+
def initialize
|
31
|
+
@state = 'fresh'
|
33
32
|
end
|
34
33
|
|
35
|
-
|
36
|
-
|
37
|
-
|
34
|
+
STATES.each do |state|
|
35
|
+
define_method :"#{state}!" do
|
36
|
+
@state = state
|
37
|
+
end
|
38
|
+
|
39
|
+
define_method :"#{state}?" do
|
40
|
+
@state == state
|
41
|
+
end
|
38
42
|
end
|
39
43
|
|
40
44
|
# @return [String] current state string representation
|
41
45
|
def to_s
|
42
|
-
|
43
|
-
when nil
|
44
|
-
'fresh'
|
45
|
-
when false
|
46
|
-
'loading'
|
47
|
-
when true
|
48
|
-
'loaded'
|
49
|
-
end
|
46
|
+
@state
|
50
47
|
end
|
51
48
|
end
|
52
49
|
end
|
@@ -10,25 +10,40 @@ module Karafka
|
|
10
10
|
#
|
11
11
|
# It provides accurate today dispatch taken from daily buffer and estimates for future days
|
12
12
|
class Tracker
|
13
|
-
# @return [Hash<String, Integer>]
|
14
|
-
attr_reader :daily
|
15
|
-
|
16
13
|
# @return [String] current state
|
17
14
|
attr_accessor :state
|
18
15
|
|
16
|
+
attr_writer :reloads
|
17
|
+
|
18
|
+
# @return [Integer] time epoch when this tracker was started
|
19
|
+
attr_reader :started_at
|
20
|
+
|
19
21
|
def initialize
|
20
22
|
@daily = Hash.new { |h, k| h[k] = 0 }
|
21
|
-
@
|
23
|
+
@started_at = Time.now.to_i
|
24
|
+
@offsets = { low: -1, high: -1 }
|
25
|
+
@state = 'fresh'
|
26
|
+
@reloads = 0
|
22
27
|
end
|
23
28
|
|
24
|
-
#
|
29
|
+
# Tracks offsets of visited messages
|
30
|
+
#
|
31
|
+
# @param message [Karafka::Messages::Message]
|
32
|
+
def offsets(message)
|
33
|
+
message_offset = message.offset
|
34
|
+
|
35
|
+
@offsets[:low] = message_offset if @offsets[:low].negative?
|
36
|
+
@offsets[:high] = message.offset
|
37
|
+
end
|
38
|
+
|
39
|
+
# Accurate (because coming from daily buffer) number of things to schedule daily
|
25
40
|
#
|
26
41
|
# @param sum [Integer]
|
27
42
|
def today=(sum)
|
28
|
-
@daily[epoch_to_date(@
|
43
|
+
@daily[epoch_to_date(@started_at)] = sum
|
29
44
|
end
|
30
45
|
|
31
|
-
# Tracks message dispatch
|
46
|
+
# Tracks future message dispatch
|
32
47
|
#
|
33
48
|
# It is only relevant for future days as for today we use accurate metrics from the daily
|
34
49
|
# buffer
|
@@ -37,12 +52,23 @@ module Karafka
|
|
37
52
|
# tombstone message. Tombstone messages cancellations are not tracked because it would
|
38
53
|
# drastically increase complexity. For given day we use the accurate counter and for
|
39
54
|
# future days we use estimates.
|
40
|
-
def
|
55
|
+
def future(message)
|
41
56
|
epoch = message.headers['schedule_target_epoch']
|
42
57
|
|
43
58
|
@daily[epoch_to_date(epoch)] += 1
|
44
59
|
end
|
45
60
|
|
61
|
+
# @return [Hash] hash with details that we want to expose
|
62
|
+
def to_h
|
63
|
+
{
|
64
|
+
state: @state,
|
65
|
+
offsets: @offsets,
|
66
|
+
daily: @daily,
|
67
|
+
started_at: @started_at,
|
68
|
+
reloads: @reloads
|
69
|
+
}.freeze
|
70
|
+
end
|
71
|
+
|
46
72
|
private
|
47
73
|
|
48
74
|
# @param epoch [Integer] epoch time
|
@@ -51,7 +51,23 @@ module Karafka
|
|
51
51
|
|
52
52
|
# @param config [Karafka::Core::Configurable::Node] root node config
|
53
53
|
def post_setup(config)
|
54
|
-
|
54
|
+
ScheduledMessages::Contracts::Config.new.validate!(
|
55
|
+
config.to_h,
|
56
|
+
scope: %w[config]
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Basically since we may have custom producers configured that are not the same as the
|
61
|
+
# default one, we hold a reference to old pre-fork producer. This means, that when we
|
62
|
+
# initialize it again in post-fork, as long as user uses defaults we should re-inherit
|
63
|
+
# it from the default config.
|
64
|
+
#
|
65
|
+
# @param config [Karafka::Core::Configurable::Node]
|
66
|
+
# @param pre_fork_producer [WaterDrop::Producer]
|
67
|
+
def post_fork(config, pre_fork_producer)
|
68
|
+
return unless config.scheduled_messages.producer == pre_fork_producer
|
69
|
+
|
70
|
+
config.scheduled_messages.producer = config.producer
|
55
71
|
end
|
56
72
|
end
|
57
73
|
end
|
@@ -22,6 +22,7 @@ module Karafka
|
|
22
22
|
|
23
23
|
# @param topic_name [String] topic name
|
24
24
|
# @param partition [Integer] partition number
|
25
|
+
# @return [Karafka::Processing::Coordinator] found or created coordinator
|
25
26
|
def find_or_create(topic_name, partition)
|
26
27
|
@coordinators[topic_name][partition] ||= begin
|
27
28
|
routing_topic = @topics.find(topic_name)
|
@@ -55,8 +55,8 @@ module Karafka
|
|
55
55
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
56
56
|
# In case like this we ignore marking
|
57
57
|
return true if seek_offset.nil?
|
58
|
-
# Ignore
|
59
|
-
return true if seek_offset
|
58
|
+
# Ignore double markings of the same offset
|
59
|
+
return true if (seek_offset - 1) == message.offset
|
60
60
|
return false if revoked?
|
61
61
|
return revoked? unless client.mark_as_consumed(message)
|
62
62
|
|
@@ -74,8 +74,8 @@ module Karafka
|
|
74
74
|
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
75
75
|
# In case like this we ignore marking
|
76
76
|
return true if seek_offset.nil?
|
77
|
-
# Ignore
|
78
|
-
return true if seek_offset
|
77
|
+
# Ignore double markings of the same offset
|
78
|
+
return true if (seek_offset - 1) == message.offset
|
79
79
|
return false if revoked?
|
80
80
|
|
81
81
|
return revoked? unless client.mark_as_consumed!(message)
|
@@ -50,15 +50,24 @@ module Karafka
|
|
50
50
|
|
51
51
|
# Ensures high-level routing details consistency
|
52
52
|
# Contains checks that require knowledge about all the consumer groups to operate
|
53
|
-
Contracts::Routing.new.validate!(
|
53
|
+
Contracts::Routing.new.validate!(
|
54
|
+
map(&:to_h),
|
55
|
+
scope: %w[routes]
|
56
|
+
)
|
54
57
|
|
55
58
|
each do |consumer_group|
|
56
59
|
# Validate consumer group settings
|
57
|
-
Contracts::ConsumerGroup.new.validate!(
|
60
|
+
Contracts::ConsumerGroup.new.validate!(
|
61
|
+
consumer_group.to_h,
|
62
|
+
scope: ['routes', consumer_group.name]
|
63
|
+
)
|
58
64
|
|
59
65
|
# and then its topics settings
|
60
66
|
consumer_group.topics.each do |topic|
|
61
|
-
Contracts::Topic.new.validate!(
|
67
|
+
Contracts::Topic.new.validate!(
|
68
|
+
topic.to_h,
|
69
|
+
scope: ['routes', consumer_group.name, topic.name]
|
70
|
+
)
|
62
71
|
end
|
63
72
|
|
64
73
|
# Initialize subscription groups after all the routing is done
|
@@ -38,13 +38,19 @@ module Karafka
|
|
38
38
|
|
39
39
|
each do |consumer_group|
|
40
40
|
if scope::Contracts.const_defined?('ConsumerGroup', false)
|
41
|
-
scope::Contracts::ConsumerGroup.new.validate!(
|
41
|
+
scope::Contracts::ConsumerGroup.new.validate!(
|
42
|
+
consumer_group.to_h,
|
43
|
+
scope: ['routes', consumer_group.name]
|
44
|
+
)
|
42
45
|
end
|
43
46
|
|
44
47
|
next unless scope::Contracts.const_defined?('Topic', false)
|
45
48
|
|
46
49
|
consumer_group.topics.each do |topic|
|
47
|
-
scope::Contracts::Topic.new.validate!(
|
50
|
+
scope::Contracts::Topic.new.validate!(
|
51
|
+
topic.to_h,
|
52
|
+
scope: ['routes', consumer_group.name, topic.name]
|
53
|
+
)
|
48
54
|
end
|
49
55
|
end
|
50
56
|
|
@@ -46,6 +46,7 @@ module Karafka
|
|
46
46
|
# When topic is set to false, it means we just want to skip dispatch on DLQ
|
47
47
|
next if topic == false
|
48
48
|
next if topic.is_a?(String) && topic_regexp.match?(topic)
|
49
|
+
next if topic == :strategy
|
49
50
|
|
50
51
|
[[%i[dead_letter_queue topic], :format]]
|
51
52
|
end
|
data/lib/karafka/runner.rb
CHANGED
@@ -4,6 +4,7 @@ module Karafka
|
|
4
4
|
# Class used to run the Karafka listeners in separate threads
|
5
5
|
class Runner
|
6
6
|
include Helpers::ConfigImporter.new(
|
7
|
+
worker_thread_priority: %i[worker_thread_priority],
|
7
8
|
manager: %i[internal connection manager],
|
8
9
|
conductor: %i[internal connection conductor],
|
9
10
|
jobs_queue_class: %i[internal processing jobs_queue_class]
|
@@ -26,7 +27,12 @@ module Karafka
|
|
26
27
|
# Register all the listeners so they can be started and managed
|
27
28
|
manager.register(listeners)
|
28
29
|
|
29
|
-
workers.each_with_index
|
30
|
+
workers.each_with_index do |worker, i|
|
31
|
+
worker.async_call(
|
32
|
+
"karafka.worker##{i}",
|
33
|
+
worker_thread_priority
|
34
|
+
)
|
35
|
+
end
|
30
36
|
|
31
37
|
# We aggregate threads here for a supervised shutdown process
|
32
38
|
Karafka::Server.workers = workers
|
data/lib/karafka/server.rb
CHANGED
@@ -9,6 +9,15 @@ module Karafka
|
|
9
9
|
|
10
10
|
private_constant :FORCEFUL_SHUTDOWN_WAIT
|
11
11
|
|
12
|
+
extend Helpers::ConfigImporter.new(
|
13
|
+
cli_contract: %i[internal cli contract],
|
14
|
+
activity_manager: %i[internal routing activity_manager],
|
15
|
+
supervision_sleep: %i[internal supervision_sleep],
|
16
|
+
shutdown_timeout: %i[shutdown_timeout],
|
17
|
+
forceful_exit_code: %i[internal forceful_exit_code],
|
18
|
+
process: %i[internal process]
|
19
|
+
)
|
20
|
+
|
12
21
|
class << self
|
13
22
|
# Set of consuming threads. Each consumer thread contains a single consumer
|
14
23
|
attr_accessor :listeners
|
@@ -30,6 +39,9 @@ module Karafka
|
|
30
39
|
# as not everything is possible when operating in non-standalone mode, etc.
|
31
40
|
attr_accessor :execution_mode
|
32
41
|
|
42
|
+
# id of the server. Useful for logging when we want to reference things issued by the server.
|
43
|
+
attr_accessor :id
|
44
|
+
|
33
45
|
# Method which runs app
|
34
46
|
def run
|
35
47
|
self.listeners = []
|
@@ -39,8 +51,9 @@ module Karafka
|
|
39
51
|
# embedded
|
40
52
|
# We cannot validate this during the start because config needs to be populated and routes
|
41
53
|
# need to be defined.
|
42
|
-
|
43
|
-
|
54
|
+
cli_contract.validate!(
|
55
|
+
activity_manager.to_h,
|
56
|
+
scope: %w[cli]
|
44
57
|
)
|
45
58
|
|
46
59
|
# We clear as we do not want parent handlers in case of working from fork
|
@@ -96,18 +109,18 @@ module Karafka
|
|
96
109
|
|
97
110
|
Karafka::App.stop!
|
98
111
|
|
99
|
-
timeout =
|
112
|
+
timeout = shutdown_timeout
|
100
113
|
|
101
114
|
# We check from time to time (for the timeout period) if all the threads finished
|
102
115
|
# their work and if so, we can just return and normal shutdown process will take place
|
103
116
|
# We divide it by 1000 because we use time in ms.
|
104
|
-
((timeout / 1_000) * (1 /
|
117
|
+
((timeout / 1_000) * (1 / supervision_sleep)).to_i.times do
|
105
118
|
all_listeners_stopped = listeners.all?(&:stopped?)
|
106
119
|
all_workers_stopped = workers.none?(&:alive?)
|
107
120
|
|
108
121
|
return if all_listeners_stopped && all_workers_stopped
|
109
122
|
|
110
|
-
sleep(
|
123
|
+
sleep(supervision_sleep)
|
111
124
|
end
|
112
125
|
|
113
126
|
raise Errors::ForcefulShutdownError
|
@@ -145,7 +158,7 @@ module Karafka
|
|
145
158
|
return unless process.supervised?
|
146
159
|
|
147
160
|
# exit! is not within the instrumentation as it would not trigger due to exit
|
148
|
-
Kernel.exit!(
|
161
|
+
Kernel.exit!(forceful_exit_code)
|
149
162
|
ensure
|
150
163
|
# We need to check if it wasn't an early exit to make sure that only on stop invocation
|
151
164
|
# can change the status after everything is closed
|
@@ -169,23 +182,13 @@ module Karafka
|
|
169
182
|
# in one direction
|
170
183
|
Karafka::App.quiet!
|
171
184
|
end
|
172
|
-
|
173
|
-
private
|
174
|
-
|
175
|
-
# @return [Karafka::Core::Configurable::Node] root config node
|
176
|
-
def config
|
177
|
-
Karafka::App.config
|
178
|
-
end
|
179
|
-
|
180
|
-
# @return [Karafka::Process] process wrapper instance used to catch system signal calls
|
181
|
-
def process
|
182
|
-
config.internal.process
|
183
|
-
end
|
184
185
|
end
|
185
186
|
|
186
187
|
# Always start with standalone so there always is a value for the execution mode.
|
187
188
|
# This is overwritten quickly during boot, but just in case someone would reach it prior to
|
188
189
|
# booting, we want to have the default value.
|
189
190
|
self.execution_mode = :standalone
|
191
|
+
|
192
|
+
self.id = SecureRandom.hex(6)
|
190
193
|
end
|
191
194
|
end
|
@@ -73,6 +73,7 @@ module Karafka
|
|
73
73
|
message.max.bytes
|
74
74
|
metadata.broker.list
|
75
75
|
metadata.max.age.ms
|
76
|
+
metadata.recovery.strategy
|
76
77
|
oauthbearer_token_refresh_cb
|
77
78
|
offset.store.method
|
78
79
|
offset.store.path
|
@@ -207,6 +208,7 @@ module Karafka
|
|
207
208
|
message.timeout.ms
|
208
209
|
metadata.broker.list
|
209
210
|
metadata.max.age.ms
|
211
|
+
metadata.recovery.strategy
|
210
212
|
msg_order_cmp
|
211
213
|
oauthbearer_token_refresh_cb
|
212
214
|
opaque
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -73,6 +73,9 @@ module Karafka
|
|
73
73
|
# Really useful when you want to ensure that all topics in routing are managed via
|
74
74
|
# declaratives.
|
75
75
|
setting :strict_declarative_topics, default: false
|
76
|
+
# Defaults to the CPU thread priority slice to -1 (50ms) to ensure that CPU intense
|
77
|
+
# processing does not affect other threads and prevents starvation
|
78
|
+
setting :worker_thread_priority, default: -1
|
76
79
|
|
77
80
|
setting :oauth do
|
78
81
|
# option [false, #call] Listener for using oauth bearer. This listener will be able to
|
@@ -128,11 +131,28 @@ module Karafka
|
|
128
131
|
# option max_wait_time [Integer] We wait only for this amount of time before raising error
|
129
132
|
# as we intercept this error and retry after checking that the operation was finished or
|
130
133
|
# failed using external factor.
|
131
|
-
|
132
|
-
|
133
|
-
#
|
134
|
-
#
|
135
|
-
setting :
|
134
|
+
#
|
135
|
+
# For async this will finish immediately but for sync operations this will wait and we
|
136
|
+
# will get a confirmation. 60 seconds is ok for both cases as for async, the re-wait will
|
137
|
+
# kick in
|
138
|
+
setting :max_wait_time, default: 60 * 1_000
|
139
|
+
|
140
|
+
# How long should we wait on admin operation retrying before giving up and raising an
|
141
|
+
# error that result is not visible
|
142
|
+
setting :max_retries_duration, default: 60_000
|
143
|
+
|
144
|
+
# In case of fast-finished async work, this `retry_backoff` help us not re-query Kafka
|
145
|
+
# too fast after previous call to check the async operation results. Basically prevents
|
146
|
+
# us from spamming metadata requests to Kafka in a loop
|
147
|
+
setting :retry_backoff, default: 500
|
148
|
+
|
149
|
+
# option poll_timeout [Integer] time in ms
|
150
|
+
# How long should a poll wait before yielding on no results (rdkafka-ruby setting)
|
151
|
+
# Lower value can be especially useful when working with Web UI, because it allows for
|
152
|
+
# increased responsiveness. Many admin operations do not take 100ms but they wait on poll
|
153
|
+
# until then prior to finishing, blocking the execution. Lowering to 25 ms can
|
154
|
+
# improve responsiveness of the Web UI. 50ms is a good trade-off for admin.
|
155
|
+
setting :poll_timeout, default: 50
|
136
156
|
end
|
137
157
|
|
138
158
|
# Namespace for internal settings that should not be modified directly
|
@@ -211,6 +231,10 @@ module Karafka
|
|
211
231
|
# How long should we wait before a critical listener recovery
|
212
232
|
# Too short may cause endless rebalance loops
|
213
233
|
setting :reset_backoff, default: 60_000
|
234
|
+
# Similar to the `#worker_thread_priority`. Listener threads do not operate for long
|
235
|
+
# time and release GVL on polling but we provide this for API consistency and some
|
236
|
+
# special edge cases.
|
237
|
+
setting :listener_thread_priority, default: 0
|
214
238
|
|
215
239
|
# Settings that are altered by our client proxy layer
|
216
240
|
setting :proxy do
|
@@ -282,6 +306,9 @@ module Karafka
|
|
282
306
|
setting :jobs_builder, default: Processing::JobsBuilder.new
|
283
307
|
# option coordinator [Class] work coordinator we want to user for processing coordination
|
284
308
|
setting :coordinator_class, default: Processing::Coordinator
|
309
|
+
# option errors_tracker_class [Class, nil] errors tracker that is used by the coordinator
|
310
|
+
# for granular error tracking. `nil` for OSS as it is not in use.
|
311
|
+
setting :errors_tracker_class, default: nil
|
285
312
|
# option partitioner_class [Class] partitioner we use against a batch of data
|
286
313
|
setting :partitioner_class, default: Processing::Partitioner
|
287
314
|
# option strategy_selector [Object] processing strategy selector to be used
|
@@ -334,7 +361,10 @@ module Karafka
|
|
334
361
|
|
335
362
|
configure(&block)
|
336
363
|
|
337
|
-
Contracts::Config.new.validate!(
|
364
|
+
Contracts::Config.new.validate!(
|
365
|
+
config.to_h,
|
366
|
+
scope: %w[config]
|
367
|
+
)
|
338
368
|
|
339
369
|
configure_components
|
340
370
|
|
@@ -367,7 +397,10 @@ module Karafka
|
|
367
397
|
config.producer ||= ::WaterDrop::Producer.new do |producer_config|
|
368
398
|
# In some cases WaterDrop updates the config and we don't want our consumer config to
|
369
399
|
# be polluted by those updates, that's why we copy
|
370
|
-
|
400
|
+
producer_kafka = AttributesMap.producer(config.kafka.dup)
|
401
|
+
# We inject some defaults (mostly for dev) unless user defined them
|
402
|
+
Setup::DefaultsInjector.producer(producer_kafka)
|
403
|
+
producer_config.kafka = producer_kafka
|
371
404
|
# We also propagate same listener to the default producer to make sure, that the
|
372
405
|
# listener for oauth is also automatically used by the producer. That way we don't
|
373
406
|
# have to configure it manually for the default producer
|
@@ -36,7 +36,17 @@ module Karafka
|
|
36
36
|
'topic.metadata.refresh.interval.ms': 5_000
|
37
37
|
}.freeze
|
38
38
|
|
39
|
-
|
39
|
+
# Contains settings that should not be used in production but make life easier in dev
|
40
|
+
# It is applied only to the default producer. If users setup their own producers, then
|
41
|
+
# they have to set this by themselves.
|
42
|
+
PRODUCER_KAFKA_DEV_DEFAULTS = {
|
43
|
+
# For all of those same reasoning as for the consumer
|
44
|
+
'allow.auto.create.topics': 'true',
|
45
|
+
'topic.metadata.refresh.interval.ms': 5_000
|
46
|
+
}.freeze
|
47
|
+
|
48
|
+
private_constant :CONSUMER_KAFKA_DEFAULTS, :CONSUMER_KAFKA_DEV_DEFAULTS,
|
49
|
+
:PRODUCER_KAFKA_DEV_DEFAULTS
|
40
50
|
|
41
51
|
class << self
|
42
52
|
# Propagates the kafka setting defaults unless they are already present for consumer config
|
@@ -58,6 +68,21 @@ module Karafka
|
|
58
68
|
kafka_config[key] = value
|
59
69
|
end
|
60
70
|
end
|
71
|
+
|
72
|
+
# Propagates the kafka settings defaults unless they are already present for producer
|
73
|
+
# config. This makes it easier to set some values that users usually don't change but still
|
74
|
+
# allows them to overwrite the whole hash.
|
75
|
+
#
|
76
|
+
# @param kafka_config [Hash] kafka scoped config
|
77
|
+
def producer(kafka_config)
|
78
|
+
return if Karafka::App.env.production?
|
79
|
+
|
80
|
+
PRODUCER_KAFKA_DEV_DEFAULTS.each do |key, value|
|
81
|
+
next if kafka_config.key?(key)
|
82
|
+
|
83
|
+
kafka_config[key] = value
|
84
|
+
end
|
85
|
+
end
|
61
86
|
end
|
62
87
|
end
|
63
88
|
end
|
data/lib/karafka/status.rb
CHANGED