karafka 2.4.18 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +59 -15
- data/.github/workflows/push.yml +35 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +75 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +72 -53
- data/LICENSE-COMM +2 -2
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/bin/clean_kafka +43 -0
- data/bin/integrations +20 -6
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +5 -1
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/avro/.gitkeep +0 -0
- data/examples/payloads/json/sample_set_01/enrollment_event.json +579 -0
- data/examples/payloads/json/sample_set_01/ingestion_event.json +30 -0
- data/examples/payloads/json/sample_set_01/transaction_event.json +17 -0
- data/examples/payloads/json/sample_set_01/user_event.json +11 -0
- data/karafka.gemspec +3 -8
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/active_job/job_extensions.rb +4 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin/configs.rb +5 -1
- data/lib/karafka/admin.rb +89 -42
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/cli/topics/align.rb +7 -4
- data/lib/karafka/cli/topics/base.rb +17 -0
- data/lib/karafka/cli/topics/create.rb +9 -7
- data/lib/karafka/cli/topics/delete.rb +4 -2
- data/lib/karafka/cli/topics/help.rb +39 -0
- data/lib/karafka/cli/topics/repartition.rb +4 -2
- data/lib/karafka/cli/topics.rb +10 -3
- data/lib/karafka/cli.rb +2 -0
- data/lib/karafka/connection/client.rb +39 -9
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +4 -1
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/base.rb +3 -2
- data/lib/karafka/contracts/config.rb +5 -1
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +46 -2
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/helpers/interval_runner.rb +8 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +95 -32
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +17 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +29 -6
- data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +9 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +12 -1
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +32 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +15 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +21 -2
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +3 -2
- data/lib/karafka/pro/routing/features/swarm.rb +4 -1
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +61 -26
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/dispatcher.rb +2 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages/proxy.rb +15 -3
- data/lib/karafka/pro/scheduled_messages/serializer.rb +2 -4
- data/lib/karafka/pro/scheduled_messages/state.rb +20 -23
- data/lib/karafka/pro/scheduled_messages/tracker.rb +34 -8
- data/lib/karafka/pro/scheduled_messages.rb +17 -1
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/builder.rb +12 -3
- data/lib/karafka/routing/features/base/expander.rb +8 -2
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +21 -18
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +40 -7
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +9 -2
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- metadata +41 -40
- checksums.yaml.gz.sig +0 -0
- data/certs/cert.pem +0 -26
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Config for parallel segments.
|
12
|
+
# @note Used on the consumer level, not per topic
|
13
|
+
Config = Struct.new(
|
14
|
+
:active,
|
15
|
+
:count,
|
16
|
+
:partitioner,
|
17
|
+
:reducer,
|
18
|
+
:merge_key,
|
19
|
+
keyword_init: true
|
20
|
+
) do
|
21
|
+
alias_method :active?, :active
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Parallel segments are defined on the consumer group (since it creates many), thus we
|
12
|
+
# define them on the consumer group.
|
13
|
+
# This module adds extra methods needed there to make it work
|
14
|
+
module ConsumerGroup
|
15
|
+
# @return [Config] parallel segments config
|
16
|
+
def parallel_segments
|
17
|
+
# We initialize it as disabled if not configured by the user
|
18
|
+
public_send(:parallel_segments=, count: 1)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Allows setting parallel segments configuration
|
22
|
+
#
|
23
|
+
# @param count [Integer] number of parallel segments (number of parallel consumer
|
24
|
+
# groups that will be created)
|
25
|
+
# @param partitioner [nil, #call] nil or callable partitioner
|
26
|
+
# @param reducer [nil, #call] reducer for parallel key. It allows for using a custom
|
27
|
+
# reducer to achieve enhanced parallelization when the default reducer is not enough.
|
28
|
+
# @param merge_key [String] key used to build the parallel segment consumer groups
|
29
|
+
#
|
30
|
+
# @note This method is an assignor but the API is actually via the `#parallel_segments`
|
31
|
+
# method. Our `Routing::Proxy` normalizes that the way we want to have it exposed
|
32
|
+
# for the end users.
|
33
|
+
def parallel_segments=(
|
34
|
+
count: 1,
|
35
|
+
partitioner: nil,
|
36
|
+
reducer: nil,
|
37
|
+
merge_key: '-parallel-'
|
38
|
+
)
|
39
|
+
@parallel_segments ||= Config.new(
|
40
|
+
active: count > 1,
|
41
|
+
count: count,
|
42
|
+
partitioner: partitioner,
|
43
|
+
reducer: reducer || ->(parallel_key) { parallel_key.to_s.sum % count },
|
44
|
+
merge_key: merge_key
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Boolean] are parallel segments active
|
49
|
+
def parallel_segments?
|
50
|
+
parallel_segments.active?
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [Integer] id of the segment (0 or bigger) or -1 if parallel segments are not
|
54
|
+
# active
|
55
|
+
def segment_id
|
56
|
+
return @segment_id if @segment_id
|
57
|
+
|
58
|
+
@segment_id = if parallel_segments?
|
59
|
+
name.split(parallel_segments.merge_key).last.to_i
|
60
|
+
else
|
61
|
+
-1
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return [String] original segment consumer group name
|
66
|
+
def segment_origin
|
67
|
+
name.split(parallel_segments.merge_key).first
|
68
|
+
end
|
69
|
+
|
70
|
+
# @return [Hash] consumer group setup with the parallel segments definition in it
|
71
|
+
def to_h
|
72
|
+
super.merge(
|
73
|
+
parallel_segments: parallel_segments.to_h.merge(
|
74
|
+
segment_id: segment_id
|
75
|
+
)
|
76
|
+
).freeze
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Namespace for parallel segments contracts
|
12
|
+
module Contracts
|
13
|
+
# Contract to validate configuration of the parallel segments feature
|
14
|
+
class ConsumerGroup < Karafka::Contracts::Base
|
15
|
+
configure do |config|
|
16
|
+
config.error_messages = YAML.safe_load(
|
17
|
+
File.read(
|
18
|
+
File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
|
19
|
+
)
|
20
|
+
).fetch('en').fetch('validations').fetch('consumer_group')
|
21
|
+
|
22
|
+
nested(:parallel_segments) do
|
23
|
+
required(:active) { |val| [true, false].include?(val) }
|
24
|
+
required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
|
25
|
+
required(:reducer) { |val| val.respond_to?(:call) }
|
26
|
+
required(:count) { |val| val.is_a?(Integer) && val >= 1 }
|
27
|
+
required(:merge_key) { |val| val.is_a?(String) && val.size >= 1 }
|
28
|
+
end
|
29
|
+
|
30
|
+
# When parallel segments are defined, partitioner needs to respond to `#call` and
|
31
|
+
# it cannot be nil
|
32
|
+
virtual do |data, errors|
|
33
|
+
next unless errors.empty?
|
34
|
+
|
35
|
+
parallel_segments = data[:parallel_segments]
|
36
|
+
|
37
|
+
next unless parallel_segments[:active]
|
38
|
+
next if parallel_segments[:partitioner].respond_to?(:call)
|
39
|
+
|
40
|
+
[[%i[parallel_segments partitioner], :respond_to_call]]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Parallel segments related expansions to the topic building flow
|
12
|
+
module Topic
|
13
|
+
# Injects the parallel segments filter as the first filter during building of each of
|
14
|
+
# the topics in case parallel segments are enabled.
|
15
|
+
#
|
16
|
+
# @param args [Object] anything accepted by the topic initializer
|
17
|
+
def initialize(*args)
|
18
|
+
super
|
19
|
+
|
20
|
+
return unless consumer_group.parallel_segments?
|
21
|
+
|
22
|
+
builder = lambda do |topic, _partition|
|
23
|
+
mom = topic.manual_offset_management?
|
24
|
+
|
25
|
+
# We have two filters for mom and non-mom scenario not to mix this logic
|
26
|
+
filter_scope = Karafka::Pro::Processing::ParallelSegments::Filters
|
27
|
+
filter_class = mom ? filter_scope::Mom : filter_scope::Default
|
28
|
+
|
29
|
+
filter_class.new(
|
30
|
+
segment_id: consumer_group.segment_id,
|
31
|
+
partitioner: consumer_group.parallel_segments.partitioner,
|
32
|
+
reducer: consumer_group.parallel_segments.reducer
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
filter(builder)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
# Feature that allows parallelizing message processing within a single consumer group by
|
11
|
+
# creating multiple consumer group instances. It enables processing messages from each
|
12
|
+
# partition in parallel by distributing them to separate consumer group instances based on
|
13
|
+
# a partitioning key. Useful for both CPU and IO bound operations.
|
14
|
+
#
|
15
|
+
# Each parallel segment operates as an independent consumer group instance, processing
|
16
|
+
# messages that are assigned to it based on the configured partitioner and reducer.
|
17
|
+
# This allows for better resource utilization and increased processing throughput without
|
18
|
+
# requiring changes to the topic's partition count.
|
19
|
+
class ParallelSegments < Base
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -45,7 +45,7 @@ module Karafka
|
|
45
45
|
# topic but this minimizes simple mistakes
|
46
46
|
#
|
47
47
|
# This sub-part of sh1 should be unique enough and short-enough to use it here
|
48
|
-
digest = Digest::
|
48
|
+
digest = Digest::SHA256.hexdigest(safe_regexp.source)[8..16]
|
49
49
|
@name = name ? name.to_s : "karafka-pattern-#{digest}"
|
50
50
|
@config = config
|
51
51
|
end
|
@@ -29,7 +29,7 @@ module Karafka
|
|
29
29
|
consumer_group tasks_cfg.group_id do
|
30
30
|
# Registers the primary topic that we use to control schedules execution. This is
|
31
31
|
# the one that we use to trigger recurring tasks.
|
32
|
-
schedules_topic = topic(topics_cfg.schedules) do
|
32
|
+
schedules_topic = topic(topics_cfg.schedules.name) do
|
33
33
|
consumer tasks_cfg.consumer_class
|
34
34
|
deserializer tasks_cfg.deserializer
|
35
35
|
# Because the topic method name as well as builder proxy method name is the same
|
@@ -83,7 +83,7 @@ module Karafka
|
|
83
83
|
|
84
84
|
# This topic is to store logs that we can then inspect either from the admin or via
|
85
85
|
# the Web UI
|
86
|
-
logs_topic = topic(topics_cfg.logs) do
|
86
|
+
logs_topic = topic(topics_cfg.logs.name) do
|
87
87
|
active(false)
|
88
88
|
deserializer tasks_cfg.deserializer
|
89
89
|
target.recurring_tasks(true)
|
@@ -12,14 +12,14 @@ module Karafka
|
|
12
12
|
module Builder
|
13
13
|
# Enabled scheduled messages operations and adds needed topics and other stuff.
|
14
14
|
#
|
15
|
-
# @param
|
15
|
+
# @param topic_name [String, false] name for scheduled messages topic that is also used
|
16
16
|
# as a group identifier. Users can have multiple schedule topics flows to prevent key
|
17
17
|
# collisions, prioritize and do other stuff. `false` if not active.
|
18
18
|
# @param block [Proc] optional reconfiguration of the topics definitions.
|
19
19
|
# @note Namespace for topics should include the divider as it is not automatically
|
20
20
|
# added.
|
21
|
-
def scheduled_messages(
|
22
|
-
return unless
|
21
|
+
def scheduled_messages(topic_name = false, &block)
|
22
|
+
return unless topic_name
|
23
23
|
|
24
24
|
# Load zlib only if user enables scheduled messages
|
25
25
|
require 'zlib'
|
@@ -32,7 +32,7 @@ module Karafka
|
|
32
32
|
consumer_group msg_cfg.group_id do
|
33
33
|
# Registers the primary topic that we use to control schedules execution. This is
|
34
34
|
# the one that we use to trigger scheduled messages.
|
35
|
-
messages_topic = topic(
|
35
|
+
messages_topic = topic(topic_name) do
|
36
36
|
instance_eval(&block) if block && block.arity.zero?
|
37
37
|
|
38
38
|
consumer msg_cfg.consumer_class
|
@@ -54,7 +54,11 @@ module Karafka
|
|
54
54
|
consumer_persistence(true)
|
55
55
|
|
56
56
|
# This needs to be enabled for the eof to work correctly
|
57
|
-
kafka(
|
57
|
+
kafka(
|
58
|
+
'enable.partition.eof': true,
|
59
|
+
'auto.offset.reset': 'earliest',
|
60
|
+
inherit: true
|
61
|
+
)
|
58
62
|
eofed(true)
|
59
63
|
|
60
64
|
# Since this is a topic that gets replayed because of schedule management, we do
|
@@ -96,7 +100,7 @@ module Karafka
|
|
96
100
|
# Holds states of scheduler per each of the partitions since they tick
|
97
101
|
# independently. We only hold future statistics not to have to deal with
|
98
102
|
# any type of state restoration
|
99
|
-
states_topic = topic("#{
|
103
|
+
states_topic = topic("#{topic_name}#{msg_cfg.states_postfix}") do
|
100
104
|
active(false)
|
101
105
|
target.scheduled_messages(true)
|
102
106
|
config(
|
@@ -28,7 +28,8 @@ module Karafka
|
|
28
28
|
# Validates that each node has at least one assignment.
|
29
29
|
#
|
30
30
|
# @param builder [Karafka::Routing::Builder]
|
31
|
-
|
31
|
+
# @param scope [Array<String>]
|
32
|
+
def validate!(builder, scope: [])
|
32
33
|
nodes_setup = Hash.new do |h, node_id|
|
33
34
|
h[node_id] = { active: false, node_id: node_id }
|
34
35
|
end
|
@@ -49,7 +50,7 @@ module Karafka
|
|
49
50
|
end
|
50
51
|
|
51
52
|
nodes_setup.each_value do |details|
|
52
|
-
super(details)
|
53
|
+
super(details, scope: scope)
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
@@ -17,7 +17,10 @@ module Karafka
|
|
17
17
|
# @param config [Karafka::Core::Configurable::Node] app config
|
18
18
|
def post_setup(config)
|
19
19
|
config.monitor.subscribe('app.before_warmup') do
|
20
|
-
Contracts::Routing.new.validate!(
|
20
|
+
Contracts::Routing.new.validate!(
|
21
|
+
config.internal.routing.builder,
|
22
|
+
scope: %w[swarm]
|
23
|
+
)
|
21
24
|
end
|
22
25
|
end
|
23
26
|
end
|
@@ -8,15 +8,33 @@ module Karafka
|
|
8
8
|
module Routing
|
9
9
|
module Features
|
10
10
|
class VirtualPartitions < Base
|
11
|
-
#
|
11
|
+
# Configuration for virtual partitions feature
|
12
12
|
Config = Struct.new(
|
13
13
|
:active,
|
14
14
|
:partitioner,
|
15
15
|
:max_partitions,
|
16
16
|
:offset_metadata_strategy,
|
17
17
|
:reducer,
|
18
|
+
:distribution,
|
18
19
|
keyword_init: true
|
19
|
-
)
|
20
|
+
) do
|
21
|
+
# @return [Boolean] is this feature active
|
22
|
+
def active?
|
23
|
+
active
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Object] distributor instance for the current distribution
|
27
|
+
def distributor
|
28
|
+
@distributor ||= case distribution
|
29
|
+
when :balanced
|
30
|
+
Processing::VirtualPartitions::Distributors::Balanced.new(self)
|
31
|
+
when :consistent
|
32
|
+
Processing::VirtualPartitions::Distributors::Consistent.new(self)
|
33
|
+
else
|
34
|
+
raise Karafka::Errors::UnsupportedCaseError, distribution
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
20
38
|
end
|
21
39
|
end
|
22
40
|
end
|
@@ -26,6 +26,7 @@ module Karafka
|
|
26
26
|
required(:reducer) { |val| val.respond_to?(:call) }
|
27
27
|
required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
|
28
28
|
required(:offset_metadata_strategy) { |val| %i[exact current].include?(val) }
|
29
|
+
required(:distribution) { |val| %i[consistent balanced].include?(val) }
|
29
30
|
end
|
30
31
|
|
31
32
|
# When virtual partitions are defined, partitioner needs to respond to `#call` and it
|
@@ -20,13 +20,18 @@ module Karafka
|
|
20
20
|
# the most recently reported metadata
|
21
21
|
# @param reducer [nil, #call] reducer for VPs key. It allows for using a custom
|
22
22
|
# reducer to achieve enhanced parallelization when the default reducer is not enough.
|
23
|
+
# @param distribution [Symbol] the strategy to use for virtual partitioning. Can be
|
24
|
+
# either `:consistent` or `:balanced`. The `:balanced` strategy ensures balanced
|
25
|
+
# distribution of work across available workers while maintaining message order
|
26
|
+
# within groups.
|
23
27
|
# @return [VirtualPartitions] method that allows to set the virtual partitions details
|
24
28
|
# during the routing configuration and then allows to retrieve it
|
25
29
|
def virtual_partitions(
|
26
30
|
max_partitions: Karafka::App.config.concurrency,
|
27
31
|
partitioner: nil,
|
28
32
|
offset_metadata_strategy: :current,
|
29
|
-
reducer: nil
|
33
|
+
reducer: nil,
|
34
|
+
distribution: :consistent
|
30
35
|
)
|
31
36
|
@virtual_partitions ||= Config.new(
|
32
37
|
active: !partitioner.nil?,
|
@@ -35,7 +40,8 @@ module Karafka
|
|
35
40
|
offset_metadata_strategy: offset_metadata_strategy,
|
36
41
|
# If no reducer provided, we use this one. It just runs a modulo on the sum of
|
37
42
|
# a stringified version, providing fairly good distribution.
|
38
|
-
reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions }
|
43
|
+
reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions },
|
44
|
+
distribution: distribution
|
39
45
|
)
|
40
46
|
end
|
41
47
|
|
@@ -8,13 +8,27 @@ module Karafka
|
|
8
8
|
module ScheduledMessages
|
9
9
|
# Consumer that coordinates scheduling of messages when the time comes
|
10
10
|
class Consumer < ::Karafka::BaseConsumer
|
11
|
+
include Helpers::ConfigImporter.new(
|
12
|
+
dispatcher_class: %i[scheduled_messages dispatcher_class]
|
13
|
+
)
|
14
|
+
|
15
|
+
# In case there is an extremely high turnover of messages, EOF may never kick in,
|
16
|
+
# effectively not changing status from loading to loaded. We use the time consumer instance
|
17
|
+
# was created + a buffer time to detect such a case (loading + messages from the time it
|
18
|
+
# was already running) to switch the state despite no EOF
|
19
|
+
# This is in seconds
|
20
|
+
GRACE_PERIOD = 15
|
21
|
+
|
22
|
+
private_constant :GRACE_PERIOD
|
23
|
+
|
11
24
|
# Prepares the initial state of all stateful components
|
12
25
|
def initialized
|
13
26
|
clear!
|
14
27
|
# Max epoch is always moving forward with the time. Never backwards, hence we do not
|
15
28
|
# reset it at all.
|
16
29
|
@max_epoch = MaxEpoch.new
|
17
|
-
@state = State.new
|
30
|
+
@state = State.new
|
31
|
+
@reloads = 0
|
18
32
|
end
|
19
33
|
|
20
34
|
# Processes messages and runs dispatch (via tick) if needed
|
@@ -23,11 +37,25 @@ module Karafka
|
|
23
37
|
|
24
38
|
messages.each do |message|
|
25
39
|
SchemaValidator.call(message)
|
40
|
+
|
41
|
+
# We always track offsets of messages, even if they would be later on skipped or
|
42
|
+
# ignored for any reason. That way we have debug info that is useful once in a while.
|
43
|
+
@tracker.offsets(message)
|
44
|
+
|
26
45
|
process_message(message)
|
27
46
|
end
|
28
47
|
|
29
48
|
@states_reporter.call
|
30
49
|
|
50
|
+
recent_timestamp = messages.last.timestamp.to_i
|
51
|
+
post_started_timestamp = @tracker.started_at + GRACE_PERIOD
|
52
|
+
|
53
|
+
# If we started getting messages that are beyond the current time, it means we have
|
54
|
+
# loaded enough to start scheduling. The upcoming messages are from the future looking
|
55
|
+
# from perspective of the current consumer start. We add a bit of grace period not to
|
56
|
+
# deal with edge cases
|
57
|
+
loaded! if @state.loading? && recent_timestamp > post_started_timestamp
|
58
|
+
|
31
59
|
eofed if eofed?
|
32
60
|
|
33
61
|
# Unless given day data is fully loaded we should not dispatch any notifications nor
|
@@ -51,8 +79,7 @@ module Karafka
|
|
51
79
|
return if reload!
|
52
80
|
|
53
81
|
# If end of the partition is reached, it always means all data is loaded
|
54
|
-
|
55
|
-
@states_reporter.call
|
82
|
+
loaded!
|
56
83
|
end
|
57
84
|
|
58
85
|
# Performs periodic operations when no new data is provided to the topic partition
|
@@ -64,7 +91,6 @@ module Karafka
|
|
64
91
|
return unless @state.loaded?
|
65
92
|
|
66
93
|
keys = []
|
67
|
-
epochs = []
|
68
94
|
|
69
95
|
# We first collect all the data for dispatch and then dispatch and **only** after
|
70
96
|
# dispatch that is sync is successful we remove those messages from the daily buffer
|
@@ -72,35 +98,30 @@ module Karafka
|
|
72
98
|
# with timeouts, etc, we need to be sure it wen through prior to deleting those messages
|
73
99
|
# from the daily buffer. That way we ensure the at least once delivery and in case of
|
74
100
|
# a transactional producer, exactly once delivery.
|
75
|
-
@daily_buffer.for_dispatch do |
|
76
|
-
epochs << epoch
|
101
|
+
@daily_buffer.for_dispatch do |message|
|
77
102
|
keys << message.key
|
78
103
|
@dispatcher << message
|
79
104
|
end
|
80
105
|
|
81
106
|
@dispatcher.flush
|
82
107
|
|
83
|
-
@max_epoch.update(epochs.max)
|
84
|
-
|
85
108
|
keys.each { |key| @daily_buffer.delete(key) }
|
86
109
|
|
87
110
|
@states_reporter.call
|
88
111
|
end
|
89
112
|
|
113
|
+
# Move the state to shutdown and publish immediately
|
114
|
+
def shutdown
|
115
|
+
@state.stopped!
|
116
|
+
@states_reporter.call!
|
117
|
+
end
|
118
|
+
|
90
119
|
private
|
91
120
|
|
92
121
|
# Takes each message and adds it to the daily accumulator if needed or performs other
|
93
122
|
# accumulator and time related per-message operations.
|
94
123
|
# @param message [Karafka::Messages::Message]
|
95
124
|
def process_message(message)
|
96
|
-
# If we started to receive messages younger than the moment we created the consumer for
|
97
|
-
# the given day, it means we have loaded all the history and we are no longer in the
|
98
|
-
# loading phase.
|
99
|
-
if message.timestamp.to_i > @today.created_at
|
100
|
-
@state.loaded!
|
101
|
-
tags.add(:state, @state.to_s)
|
102
|
-
end
|
103
|
-
|
104
125
|
# If this is a schedule message we need to check if this is for today. Tombstone events
|
105
126
|
# are always considered immediate as they indicate, that a message with a given key
|
106
127
|
# was already dispatched or that user decided not to dispatch and cancelled the dispatch
|
@@ -109,7 +130,7 @@ module Karafka
|
|
109
130
|
time = message.headers['schedule_target_epoch']
|
110
131
|
|
111
132
|
# Do not track historical below today as those will be reflected in the daily buffer
|
112
|
-
@tracker.
|
133
|
+
@tracker.future(message) if time >= @today.starts_at
|
113
134
|
|
114
135
|
if time > @today.ends_at || time < @max_epoch.to_i
|
115
136
|
# Clean the message immediately when not needed (won't be scheduled) to preserve
|
@@ -120,6 +141,14 @@ module Karafka
|
|
120
141
|
end
|
121
142
|
end
|
122
143
|
|
144
|
+
# Tombstone events are only published after we have dispatched given message. This means
|
145
|
+
# that we've got that far in the dispatching time. This allows us (with a certain buffer)
|
146
|
+
# to quickly reject older messages (older in sense of being scheduled for previous times)
|
147
|
+
# instead of loading them into memory until they are expired
|
148
|
+
if message.headers['schedule_source_type'] == 'tombstone'
|
149
|
+
@max_epoch.update(message.headers['schedule_target_epoch'])
|
150
|
+
end
|
151
|
+
|
123
152
|
# Add to buffer all tombstones and messages for the same day
|
124
153
|
@daily_buffer << message
|
125
154
|
end
|
@@ -129,7 +158,8 @@ module Karafka
|
|
129
158
|
# If this is a new assignment we always need to seek from beginning to load the data
|
130
159
|
if @state.fresh?
|
131
160
|
clear!
|
132
|
-
|
161
|
+
@reloads += 1
|
162
|
+
seek(:earliest)
|
133
163
|
|
134
164
|
return true
|
135
165
|
end
|
@@ -140,7 +170,8 @@ module Karafka
|
|
140
170
|
# If day has ended we reload and start new day with new schedules
|
141
171
|
if @today.ended?
|
142
172
|
clear!
|
143
|
-
|
173
|
+
@reloads += 1
|
174
|
+
seek(:earliest)
|
144
175
|
|
145
176
|
return true
|
146
177
|
end
|
@@ -148,6 +179,13 @@ module Karafka
|
|
148
179
|
false
|
149
180
|
end
|
150
181
|
|
182
|
+
# Moves the state to loaded and publishes the state update
|
183
|
+
def loaded!
|
184
|
+
@state.loaded!
|
185
|
+
tags.add(:state, @state.to_s)
|
186
|
+
@states_reporter.call!
|
187
|
+
end
|
188
|
+
|
151
189
|
# Resets all buffers and states so we can start a new day with a clean slate
|
152
190
|
# We can fully recreate the dispatcher because any undispatched messages will be dispatched
|
153
191
|
# with the new day dispatcher after it is reloaded.
|
@@ -155,22 +193,19 @@ module Karafka
|
|
155
193
|
@daily_buffer = DailyBuffer.new
|
156
194
|
@today = Day.new
|
157
195
|
@tracker = Tracker.new
|
158
|
-
@state = State.new
|
159
|
-
@
|
196
|
+
@state = State.new
|
197
|
+
@state.loading!
|
198
|
+
@dispatcher = dispatcher_class.new(topic.name, partition)
|
160
199
|
@states_reporter = Helpers::IntervalRunner.new do
|
161
200
|
@tracker.today = @daily_buffer.size
|
162
201
|
@tracker.state = @state.to_s
|
202
|
+
@tracker.reloads = @reloads
|
163
203
|
|
164
204
|
@dispatcher.state(@tracker)
|
165
205
|
end
|
166
206
|
|
167
207
|
tags.add(:state, @state.to_s)
|
168
208
|
end
|
169
|
-
|
170
|
-
# @return [Karafka::Core::Configurable::Node] Schedules config node
|
171
|
-
def config
|
172
|
-
@config ||= Karafka::App.config.scheduled_messages
|
173
|
-
end
|
174
209
|
end
|
175
210
|
end
|
176
211
|
end
|
@@ -45,19 +45,22 @@ module Karafka
|
|
45
45
|
|
46
46
|
# Yields messages that should be dispatched (sent) to Kafka
|
47
47
|
#
|
48
|
-
# @yieldparam [
|
49
|
-
#
|
50
|
-
#
|
51
|
-
# @note We yield epoch alongside of the message so we do not have to extract it several
|
52
|
-
# times later on. This simplifies the API
|
48
|
+
# @yieldparam [Karafka::Messages::Message] messages to be dispatched sorted from the once
|
49
|
+
# that are the oldest (lowest epoch)
|
53
50
|
def for_dispatch
|
54
51
|
dispatch = Time.now.to_i
|
55
52
|
|
53
|
+
selected = []
|
54
|
+
|
56
55
|
@accu.each_value do |epoch, message|
|
57
56
|
next unless epoch <= dispatch
|
58
57
|
|
59
|
-
|
58
|
+
selected << [epoch, message]
|
60
59
|
end
|
60
|
+
|
61
|
+
selected
|
62
|
+
.sort_by!(&:first)
|
63
|
+
.each { |_, message| yield(message) }
|
61
64
|
end
|
62
65
|
|
63
66
|
# Removes given key from the accumulator
|
@@ -10,6 +10,12 @@ module Karafka
|
|
10
10
|
module Deserializers
|
11
11
|
# Converts certain pieces of headers into their integer form for messages
|
12
12
|
class Headers
|
13
|
+
# We only directly operate on epoch and other details for schedules and tombstones.
|
14
|
+
# cancel requests don't have to be deserialized that way since they don't have epoch
|
15
|
+
WORKABLE_TYPES = %w[schedule tombstone].freeze
|
16
|
+
|
17
|
+
private_constant :WORKABLE_TYPES
|
18
|
+
|
13
19
|
# @param metadata [Karafka::aMessages::Metadata]
|
14
20
|
# @return [Hash] headers
|
15
21
|
def call(metadata)
|
@@ -19,7 +25,7 @@ module Karafka
|
|
19
25
|
|
20
26
|
# tombstone and cancellation events are not operable, thus we do not have to cast any
|
21
27
|
# of the headers pieces
|
22
|
-
return raw_headers unless type
|
28
|
+
return raw_headers unless WORKABLE_TYPES.include?(type)
|
23
29
|
|
24
30
|
headers = raw_headers.dup
|
25
31
|
headers['schedule_target_epoch'] = headers['schedule_target_epoch'].to_i
|