karafka 2.4.18 → 2.5.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +3 -0
- data/.github/workflows/ci.yml +58 -14
- data/.github/workflows/push.yml +36 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +60 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +69 -50
- data/LICENSE-COMM +2 -2
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/bin/clean_kafka +43 -0
- data/bin/integrations +19 -6
- data/bin/rspecs +15 -3
- data/bin/verify_kafka_warnings +35 -0
- data/bin/verify_topics_naming +27 -0
- data/config/locales/errors.yml +3 -0
- data/config/locales/pro_errors.yml +13 -2
- data/docker-compose.yml +1 -1
- data/examples/payloads/json/enrollment_event.json +579 -0
- data/examples/payloads/json/ingestion_event.json +30 -0
- data/examples/payloads/json/transaction_event.json +17 -0
- data/examples/payloads/json/user_event.json +11 -0
- data/karafka.gemspec +3 -8
- data/lib/karafka/active_job/current_attributes.rb +1 -1
- data/lib/karafka/admin/acl.rb +5 -1
- data/lib/karafka/admin/configs.rb +5 -1
- data/lib/karafka/admin.rb +69 -34
- data/lib/karafka/base_consumer.rb +17 -8
- data/lib/karafka/cli/base.rb +8 -2
- data/lib/karafka/cli/topics/align.rb +7 -4
- data/lib/karafka/cli/topics/base.rb +17 -0
- data/lib/karafka/cli/topics/create.rb +9 -7
- data/lib/karafka/cli/topics/delete.rb +4 -2
- data/lib/karafka/cli/topics/help.rb +39 -0
- data/lib/karafka/cli/topics/repartition.rb +4 -2
- data/lib/karafka/cli/topics.rb +10 -3
- data/lib/karafka/cli.rb +2 -0
- data/lib/karafka/connection/client.rb +30 -9
- data/lib/karafka/connection/listener.rb +24 -12
- data/lib/karafka/connection/messages_buffer.rb +1 -1
- data/lib/karafka/connection/proxy.rb +3 -0
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/config.rb +3 -0
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/errors.rb +46 -2
- data/lib/karafka/helpers/async.rb +3 -1
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
- data/lib/karafka/instrumentation/logger_listener.rb +86 -23
- data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/pro/cleaner.rb +8 -0
- data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
- data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
- data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
- data/lib/karafka/pro/connection/manager.rb +5 -8
- data/lib/karafka/pro/encryption.rb +8 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
- data/lib/karafka/pro/iterator/expander.rb +5 -3
- data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
- data/lib/karafka/pro/loader.rb +10 -0
- data/lib/karafka/pro/processing/coordinator.rb +4 -1
- data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
- data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
- data/lib/karafka/pro/processing/filters/base.rb +10 -2
- data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
- data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
- data/lib/karafka/pro/processing/partitioner.rb +1 -13
- data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +36 -8
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
- data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
- data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
- data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
- data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
- data/lib/karafka/pro/recurring_tasks.rb +13 -0
- data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
- data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
- data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
- data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
- data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
- data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
- data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
- data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
- data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
- data/lib/karafka/pro/scheduled_messages/consumer.rb +19 -21
- data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
- data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
- data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
- data/lib/karafka/pro/scheduled_messages.rb +13 -0
- data/lib/karafka/processing/coordinators_buffer.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +4 -4
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/subscription_group.rb +1 -1
- data/lib/karafka/runner.rb +7 -1
- data/lib/karafka/server.rb +19 -19
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/config.rb +22 -1
- data/lib/karafka/setup/defaults_injector.rb +26 -1
- data/lib/karafka/status.rb +6 -1
- data/lib/karafka/swarm/node.rb +31 -0
- data/lib/karafka/swarm/supervisor.rb +4 -0
- data/lib/karafka/templates/karafka.rb.erb +14 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +17 -9
- data/renovate.json +14 -2
- metadata +40 -40
- checksums.yaml.gz.sig +0 -0
- data/certs/cert.pem +0 -26
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
@@ -38,7 +38,7 @@ module Karafka
|
|
38
38
|
elsif !revoked? && !coordinator.manual_seek?
|
39
39
|
# If not revoked and not throttled, we move to where we were suppose to and
|
40
40
|
# resume
|
41
|
-
seek(last_group_message.offset + 1, false)
|
41
|
+
seek(last_group_message.offset + 1, false, reset_offset: false)
|
42
42
|
resume
|
43
43
|
else
|
44
44
|
resume
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
module VirtualPartitions
|
10
|
+
module Distributors
|
11
|
+
# Balanced distributor that groups messages by partition key
|
12
|
+
# and processes larger groups first while maintaining message order within groups
|
13
|
+
class Balanced < Base
|
14
|
+
# @param messages [Array<Karafka::Messages::Message>] messages to distribute
|
15
|
+
# @return [Hash<Integer, Array<Karafka::Messages::Message>>] hash with group ids as
|
16
|
+
# keys and message groups as values
|
17
|
+
def call(messages)
|
18
|
+
# Group messages by partition key
|
19
|
+
key_groupings = messages.group_by { |msg| config.partitioner.call(msg) }
|
20
|
+
|
21
|
+
worker_loads = Array.new(config.max_partitions, 0)
|
22
|
+
worker_assignments = Array.new(config.max_partitions) { [] }
|
23
|
+
|
24
|
+
# Sort keys by workload in descending order
|
25
|
+
sorted_keys = key_groupings.keys.sort_by { |key| -key_groupings[key].size }
|
26
|
+
|
27
|
+
# Assign each key to the worker with the least current load
|
28
|
+
sorted_keys.each do |key|
|
29
|
+
# Find worker with minimum current load
|
30
|
+
min_load_worker = worker_loads.each_with_index.min_by { |load, _| load }[1]
|
31
|
+
messages = key_groupings[key]
|
32
|
+
|
33
|
+
# Assign this key to that worker
|
34
|
+
worker_assignments[min_load_worker] += messages
|
35
|
+
worker_loads[min_load_worker] += messages.size
|
36
|
+
end
|
37
|
+
|
38
|
+
# Combine messages for each worker and sort by offset
|
39
|
+
worker_assignments
|
40
|
+
.each_with_index
|
41
|
+
.reject { |group_messages, _| group_messages.empty? }
|
42
|
+
.map! { |group_messages, index| [index, group_messages.sort_by!(&:offset)] }
|
43
|
+
.to_h
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
# Processing components for virtual partitions
|
10
|
+
module VirtualPartitions
|
11
|
+
# Distributors for virtual partitions
|
12
|
+
module Distributors
|
13
|
+
# Base class for all virtual partition distributors
|
14
|
+
class Base
|
15
|
+
# @param config [Karafka::Pro::Routing::Features::VirtualPartitions::Config]
|
16
|
+
def initialize(config)
|
17
|
+
@config = config
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
# @return [Karafka::Pro::Routing::Features::VirtualPartitions::Config]
|
23
|
+
attr_reader :config
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Processing
|
9
|
+
module VirtualPartitions
|
10
|
+
module Distributors
|
11
|
+
# Consistent distributor that ensures messages with the same partition key
|
12
|
+
# are always processed in the same virtual partition
|
13
|
+
class Consistent < Base
|
14
|
+
# @param messages [Array<Karafka::Messages::Message>] messages to distribute
|
15
|
+
# @return [Hash<Integer, Array<Karafka::Messages::Message>>] hash with group ids as
|
16
|
+
# keys and message groups as values
|
17
|
+
def call(messages)
|
18
|
+
messages
|
19
|
+
.group_by { |msg| config.reducer.call(config.partitioner.call(msg)) }
|
20
|
+
.to_h
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -29,12 +29,16 @@ module Karafka
|
|
29
29
|
end
|
30
30
|
|
31
31
|
nested(:topics) do
|
32
|
-
|
33
|
-
|
32
|
+
nested(:schedules) do
|
33
|
+
required(:name) do |val|
|
34
|
+
val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
|
35
|
+
end
|
34
36
|
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
+
nested(:logs) do
|
39
|
+
required(:name) do |val|
|
40
|
+
val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
|
41
|
+
end
|
38
42
|
end
|
39
43
|
end
|
40
44
|
end
|
@@ -12,7 +12,7 @@ module Karafka
|
|
12
12
|
# Snapshots to Kafka current schedule state
|
13
13
|
def schedule
|
14
14
|
produce(
|
15
|
-
topics.schedules,
|
15
|
+
topics.schedules.name,
|
16
16
|
'state:schedule',
|
17
17
|
serializer.schedule(::Karafka::Pro::RecurringTasks.schedule)
|
18
18
|
)
|
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
# because in the web ui we work with the full name and it is easier. Since
|
26
26
|
def command(name, task_id)
|
27
27
|
produce(
|
28
|
-
topics.schedules,
|
28
|
+
topics.schedules.name,
|
29
29
|
"command:#{name}:#{task_id}",
|
30
30
|
serializer.command(name, task_id)
|
31
31
|
)
|
@@ -35,7 +35,7 @@ module Karafka
|
|
35
35
|
# @param event [Karafka::Core::Monitoring::Event]
|
36
36
|
def log(event)
|
37
37
|
produce(
|
38
|
-
topics.logs,
|
38
|
+
topics.logs.name,
|
39
39
|
event[:task].id,
|
40
40
|
serializer.log(event)
|
41
41
|
)
|
@@ -32,8 +32,13 @@ module Karafka
|
|
32
32
|
)
|
33
33
|
|
34
34
|
setting(:topics) do
|
35
|
-
setting(:schedules
|
36
|
-
|
35
|
+
setting(:schedules) do
|
36
|
+
setting(:name, default: 'karafka_recurring_tasks_schedules')
|
37
|
+
end
|
38
|
+
|
39
|
+
setting(:logs) do
|
40
|
+
setting(:name, default: 'karafka_recurring_tasks_logs')
|
41
|
+
end
|
37
42
|
end
|
38
43
|
|
39
44
|
configure
|
@@ -73,6 +73,19 @@ module Karafka
|
|
73
73
|
|
74
74
|
Karafka.monitor.subscribe(Listener.new)
|
75
75
|
end
|
76
|
+
|
77
|
+
# Basically since we may have custom producers configured that are not the same as the
|
78
|
+
# default one, we hold a reference to old pre-fork producer. This means, that when we
|
79
|
+
# initialize it again in post-fork, as long as user uses defaults we should re-inherit
|
80
|
+
# it from the default config.
|
81
|
+
#
|
82
|
+
# @param config [Karafka::Core::Configurable::Node]
|
83
|
+
# @param pre_fork_producer [WaterDrop::Producer]
|
84
|
+
def post_fork(config, pre_fork_producer)
|
85
|
+
return unless config.recurring_tasks.producer == pre_fork_producer
|
86
|
+
|
87
|
+
config.recurring_tasks.producer = config.producer
|
88
|
+
end
|
76
89
|
end
|
77
90
|
end
|
78
91
|
end
|
@@ -12,7 +12,7 @@ module Karafka
|
|
12
12
|
module Topic
|
13
13
|
# @param strategy [#call, nil] Strategy we want to use or nil if a default strategy
|
14
14
|
# (same as in OSS) should be applied
|
15
|
-
# @param args [Hash]
|
15
|
+
# @param args [Hash] Pro DLQ arguments
|
16
16
|
def dead_letter_queue(strategy: nil, **args)
|
17
17
|
return @dead_letter_queue if @dead_letter_queue
|
18
18
|
|
@@ -28,6 +28,7 @@ module Karafka
|
|
28
28
|
optional(:multiplexing_min) { |val| val.is_a?(Integer) && val >= 1 }
|
29
29
|
optional(:multiplexing_max) { |val| val.is_a?(Integer) && val >= 1 }
|
30
30
|
optional(:multiplexing_boot) { |val| val.is_a?(Integer) && val >= 1 }
|
31
|
+
optional(:multiplexing_scale_delay) { |val| val.is_a?(Integer) && val >= 1_000 }
|
31
32
|
end
|
32
33
|
|
33
34
|
# Makes sure min is not more than max
|
@@ -78,6 +79,22 @@ module Karafka
|
|
78
79
|
[[%w[subscription_group_details], :multiplexing_boot_not_dynamic]]
|
79
80
|
end
|
80
81
|
|
82
|
+
# Makes sure we do not run multiplexing with 1 always which does not make much sense
|
83
|
+
# because then it behaves like without multiplexing and can create problems for
|
84
|
+
# users running multiplexed subscription groups with multiple topics
|
85
|
+
virtual do |data, errors|
|
86
|
+
next unless errors.empty?
|
87
|
+
next unless min(data)
|
88
|
+
next unless max(data)
|
89
|
+
|
90
|
+
min = min(data)
|
91
|
+
max = max(data)
|
92
|
+
|
93
|
+
next unless min == 1 && max == 1
|
94
|
+
|
95
|
+
[[%w[subscription_group_details], :multiplexing_one_not_enough]]
|
96
|
+
end
|
97
|
+
|
81
98
|
class << self
|
82
99
|
# @param data [Hash] topic details
|
83
100
|
# @return [Integer, false] min or false if missing
|
@@ -14,12 +14,15 @@ module Karafka
|
|
14
14
|
# disabling dynamic multiplexing
|
15
15
|
# @param max [Integer] max multiplexing count
|
16
16
|
# @param boot [Integer] how many listeners should we start during boot by default
|
17
|
-
|
17
|
+
# @param scale_delay [Integer] number of ms of delay before applying any scale
|
18
|
+
# operation to a consumer group
|
19
|
+
def multiplexing(min: nil, max: 1, boot: nil, scale_delay: 60_000)
|
18
20
|
@target.current_subscription_group_details.merge!(
|
19
21
|
multiplexing_min: min || max,
|
20
22
|
multiplexing_max: max,
|
21
23
|
# Picks half of max by default as long as possible. Otherwise goes with min
|
22
|
-
multiplexing_boot: boot || [min || max, (max / 2)].max
|
24
|
+
multiplexing_boot: boot || [min || max, (max / 2)].max,
|
25
|
+
multiplexing_scale_delay: scale_delay
|
23
26
|
)
|
24
27
|
end
|
25
28
|
end
|
@@ -16,9 +16,16 @@ module Karafka
|
|
16
16
|
max = @details.fetch(:multiplexing_max, 1)
|
17
17
|
min = @details.fetch(:multiplexing_min, max)
|
18
18
|
boot = @details.fetch(:multiplexing_boot, max / 2)
|
19
|
+
scale_delay = @details.fetch(:multiplexing_scale_delay, 60_000)
|
19
20
|
active = max > 1
|
20
21
|
|
21
|
-
Config.new(
|
22
|
+
Config.new(
|
23
|
+
active: active,
|
24
|
+
min: min,
|
25
|
+
max: max,
|
26
|
+
boot: boot,
|
27
|
+
scale_delay: scale_delay
|
28
|
+
)
|
22
29
|
end
|
23
30
|
end
|
24
31
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Expansions for the routing builder
|
12
|
+
module Builder
|
13
|
+
# Builds and saves given consumer group
|
14
|
+
# @param group_id [String, Symbol] name for consumer group
|
15
|
+
# @param block [Proc] proc that should be executed in the proxy context
|
16
|
+
def consumer_group(group_id, &block)
|
17
|
+
consumer_group = find { |cg| cg.name == group_id.to_s }
|
18
|
+
|
19
|
+
# Re-opening a CG should not change its parallel setup
|
20
|
+
if consumer_group
|
21
|
+
super
|
22
|
+
else
|
23
|
+
# We build a temp consumer group and a target to check if it has parallel segments
|
24
|
+
# enabled and if so, we do not add it to the routing but instead we build the
|
25
|
+
# appropriate number of parallel segment groups
|
26
|
+
temp_consumer_group = ::Karafka::Routing::ConsumerGroup.new(group_id.to_s)
|
27
|
+
temp_target = Karafka::Routing::Proxy.new(temp_consumer_group, &block).target
|
28
|
+
config = temp_target.parallel_segments
|
29
|
+
|
30
|
+
if config.active?
|
31
|
+
config.count.times do |i|
|
32
|
+
sub_name = [group_id, config.merge_key, i.to_s].join
|
33
|
+
sub_consumer_group = Karafka::Routing::ConsumerGroup.new(sub_name)
|
34
|
+
self << Karafka::Routing::Proxy.new(sub_consumer_group, &block).target
|
35
|
+
end
|
36
|
+
# If not parallel segments are not active we go with the default flow
|
37
|
+
else
|
38
|
+
super
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Config for parallel segments.
|
12
|
+
# @note Used on the consumer level, not per topic
|
13
|
+
Config = Struct.new(
|
14
|
+
:active,
|
15
|
+
:count,
|
16
|
+
:partitioner,
|
17
|
+
:reducer,
|
18
|
+
:merge_key,
|
19
|
+
keyword_init: true
|
20
|
+
) do
|
21
|
+
alias_method :active?, :active
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Parallel segments are defined on the consumer group (since it creates many), thus we
|
12
|
+
# define them on the consumer group.
|
13
|
+
# This module adds extra methods needed there to make it work
|
14
|
+
module ConsumerGroup
|
15
|
+
# @return [Config] parallel segments config
|
16
|
+
def parallel_segments
|
17
|
+
# We initialize it as disabled if not configured by the user
|
18
|
+
public_send(:parallel_segments=, count: 1)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Allows setting parallel segments configuration
|
22
|
+
#
|
23
|
+
# @param count [Integer] number of parallel segments (number of parallel consumer
|
24
|
+
# groups that will be created)
|
25
|
+
# @param partitioner [nil, #call] nil or callable partitioner
|
26
|
+
# @param reducer [nil, #call] reducer for parallel key. It allows for using a custom
|
27
|
+
# reducer to achieve enhanced parallelization when the default reducer is not enough.
|
28
|
+
# @param merge_key [String] key used to build the parallel segment consumer groups
|
29
|
+
#
|
30
|
+
# @note This method is an assignor but the API is actually via the `#parallel_segments`
|
31
|
+
# method. Our `Routing::Proxy` normalizes that the way we want to have it exposed
|
32
|
+
# for the end users.
|
33
|
+
def parallel_segments=(
|
34
|
+
count: 1,
|
35
|
+
partitioner: nil,
|
36
|
+
reducer: nil,
|
37
|
+
merge_key: '-parallel-'
|
38
|
+
)
|
39
|
+
@parallel_segments ||= Config.new(
|
40
|
+
active: count > 1,
|
41
|
+
count: count,
|
42
|
+
partitioner: partitioner,
|
43
|
+
reducer: reducer || ->(parallel_key) { parallel_key.to_s.sum % count },
|
44
|
+
merge_key: merge_key
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Boolean] are parallel segments active
|
49
|
+
def parallel_segments?
|
50
|
+
parallel_segments.active?
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [Integer] id of the segment (0 or bigger) or -1 if parallel segments are not
|
54
|
+
# active
|
55
|
+
def segment_id
|
56
|
+
return @segment_id if @segment_id
|
57
|
+
|
58
|
+
@segment_id = if parallel_segments?
|
59
|
+
name.split(parallel_segments.merge_key).last.to_i
|
60
|
+
else
|
61
|
+
-1
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return [String] original segment consumer group name
|
66
|
+
def segment_origin
|
67
|
+
name.split(parallel_segments.merge_key).first
|
68
|
+
end
|
69
|
+
|
70
|
+
# @return [Hash] consumer group setup with the parallel segments definition in it
|
71
|
+
def to_h
|
72
|
+
super.merge(
|
73
|
+
parallel_segments: parallel_segments.to_h.merge(
|
74
|
+
segment_id: segment_id
|
75
|
+
)
|
76
|
+
).freeze
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Namespace for parallel segments contracts
|
12
|
+
module Contracts
|
13
|
+
# Contract to validate configuration of the parallel segments feature
|
14
|
+
class ConsumerGroup < Karafka::Contracts::Base
|
15
|
+
configure do |config|
|
16
|
+
config.error_messages = YAML.safe_load(
|
17
|
+
File.read(
|
18
|
+
File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
|
19
|
+
)
|
20
|
+
).fetch('en').fetch('validations').fetch('consumer_group')
|
21
|
+
|
22
|
+
nested(:parallel_segments) do
|
23
|
+
required(:active) { |val| [true, false].include?(val) }
|
24
|
+
required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
|
25
|
+
required(:reducer) { |val| val.respond_to?(:call) }
|
26
|
+
required(:count) { |val| val.is_a?(Integer) && val >= 1 }
|
27
|
+
required(:merge_key) { |val| val.is_a?(String) && val.size >= 1 }
|
28
|
+
end
|
29
|
+
|
30
|
+
# When parallel segments are defined, partitioner needs to respond to `#call` and
|
31
|
+
# it cannot be nil
|
32
|
+
virtual do |data, errors|
|
33
|
+
next unless errors.empty?
|
34
|
+
|
35
|
+
parallel_segments = data[:parallel_segments]
|
36
|
+
|
37
|
+
next unless parallel_segments[:active]
|
38
|
+
next if parallel_segments[:partitioner].respond_to?(:call)
|
39
|
+
|
40
|
+
[[%i[parallel_segments partitioner], :respond_to_call]]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
class ParallelSegments < Base
|
11
|
+
# Parallel segments related expansions to the topic building flow
|
12
|
+
module Topic
|
13
|
+
# Injects the parallel segments filter as the first filter during building of each of
|
14
|
+
# the topics in case parallel segments are enabled.
|
15
|
+
#
|
16
|
+
# @param args [Object] anything accepted by the topic initializer
|
17
|
+
def initialize(*args)
|
18
|
+
super
|
19
|
+
|
20
|
+
return unless consumer_group.parallel_segments?
|
21
|
+
|
22
|
+
builder = lambda do |topic, _partition|
|
23
|
+
mom = topic.manual_offset_management?
|
24
|
+
|
25
|
+
# We have two filters for mom and non-mom scenario not to mix this logic
|
26
|
+
filter_scope = Karafka::Pro::Processing::ParallelSegments::Filters
|
27
|
+
filter_class = mom ? filter_scope::Mom : filter_scope::Default
|
28
|
+
|
29
|
+
filter_class.new(
|
30
|
+
segment_id: consumer_group.segment_id,
|
31
|
+
partitioner: consumer_group.parallel_segments.partitioner,
|
32
|
+
reducer: consumer_group.parallel_segments.reducer
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
filter(builder)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This code is part of Karafka Pro, a commercial component not licensed under LGPL.
|
4
|
+
# See LICENSE for details.
|
5
|
+
|
6
|
+
module Karafka
|
7
|
+
module Pro
|
8
|
+
module Routing
|
9
|
+
module Features
|
10
|
+
# Feature that allows parallelizing message processing within a single consumer group by
|
11
|
+
# creating multiple consumer group instances. It enables processing messages from each
|
12
|
+
# partition in parallel by distributing them to separate consumer group instances based on
|
13
|
+
# a partitioning key. Useful for both CPU and IO bound operations.
|
14
|
+
#
|
15
|
+
# Each parallel segment operates as an independent consumer group instance, processing
|
16
|
+
# messages that are assigned to it based on the configured partitioner and reducer.
|
17
|
+
# This allows for better resource utilization and increased processing throughput without
|
18
|
+
# requiring changes to the topic's partition count.
|
19
|
+
class ParallelSegments < Base
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -45,7 +45,7 @@ module Karafka
|
|
45
45
|
# topic but this minimizes simple mistakes
|
46
46
|
#
|
47
47
|
# This sub-part of sh1 should be unique enough and short-enough to use it here
|
48
|
-
digest = Digest::
|
48
|
+
digest = Digest::SHA256.hexdigest(safe_regexp.source)[8..16]
|
49
49
|
@name = name ? name.to_s : "karafka-pattern-#{digest}"
|
50
50
|
@config = config
|
51
51
|
end
|
@@ -29,7 +29,7 @@ module Karafka
|
|
29
29
|
consumer_group tasks_cfg.group_id do
|
30
30
|
# Registers the primary topic that we use to control schedules execution. This is
|
31
31
|
# the one that we use to trigger recurring tasks.
|
32
|
-
schedules_topic = topic(topics_cfg.schedules) do
|
32
|
+
schedules_topic = topic(topics_cfg.schedules.name) do
|
33
33
|
consumer tasks_cfg.consumer_class
|
34
34
|
deserializer tasks_cfg.deserializer
|
35
35
|
# Because the topic method name as well as builder proxy method name is the same
|
@@ -83,7 +83,7 @@ module Karafka
|
|
83
83
|
|
84
84
|
# This topic is to store logs that we can then inspect either from the admin or via
|
85
85
|
# the Web UI
|
86
|
-
logs_topic = topic(topics_cfg.logs) do
|
86
|
+
logs_topic = topic(topics_cfg.logs.name) do
|
87
87
|
active(false)
|
88
88
|
deserializer tasks_cfg.deserializer
|
89
89
|
target.recurring_tasks(true)
|
@@ -12,14 +12,14 @@ module Karafka
|
|
12
12
|
module Builder
|
13
13
|
# Enabled scheduled messages operations and adds needed topics and other stuff.
|
14
14
|
#
|
15
|
-
# @param
|
15
|
+
# @param topic_name [String, false] name for scheduled messages topic that is also used
|
16
16
|
# as a group identifier. Users can have multiple schedule topics flows to prevent key
|
17
17
|
# collisions, prioritize and do other stuff. `false` if not active.
|
18
18
|
# @param block [Proc] optional reconfiguration of the topics definitions.
|
19
19
|
# @note Namespace for topics should include the divider as it is not automatically
|
20
20
|
# added.
|
21
|
-
def scheduled_messages(
|
22
|
-
return unless
|
21
|
+
def scheduled_messages(topic_name = false, &block)
|
22
|
+
return unless topic_name
|
23
23
|
|
24
24
|
# Load zlib only if user enables scheduled messages
|
25
25
|
require 'zlib'
|
@@ -32,7 +32,7 @@ module Karafka
|
|
32
32
|
consumer_group msg_cfg.group_id do
|
33
33
|
# Registers the primary topic that we use to control schedules execution. This is
|
34
34
|
# the one that we use to trigger scheduled messages.
|
35
|
-
messages_topic = topic(
|
35
|
+
messages_topic = topic(topic_name) do
|
36
36
|
instance_eval(&block) if block && block.arity.zero?
|
37
37
|
|
38
38
|
consumer msg_cfg.consumer_class
|
@@ -54,7 +54,11 @@ module Karafka
|
|
54
54
|
consumer_persistence(true)
|
55
55
|
|
56
56
|
# This needs to be enabled for the eof to work correctly
|
57
|
-
kafka(
|
57
|
+
kafka(
|
58
|
+
'enable.partition.eof': true,
|
59
|
+
'auto.offset.reset': 'earliest',
|
60
|
+
inherit: true
|
61
|
+
)
|
58
62
|
eofed(true)
|
59
63
|
|
60
64
|
# Since this is a topic that gets replayed because of schedule management, we do
|
@@ -96,7 +100,7 @@ module Karafka
|
|
96
100
|
# Holds states of scheduler per each of the partitions since they tick
|
97
101
|
# independently. We only hold future statistics not to have to deal with
|
98
102
|
# any type of state restoration
|
99
|
-
states_topic = topic("#{
|
103
|
+
states_topic = topic("#{topic_name}#{msg_cfg.states_postfix}") do
|
100
104
|
active(false)
|
101
105
|
target.scheduled_messages(true)
|
102
106
|
config(
|