karafka 2.2.14 → 2.3.0.alpha2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +38 -12
- data/.ruby-version +1 -1
- data/CHANGELOG.md +24 -0
- data/Gemfile.lock +16 -16
- data/README.md +0 -2
- data/SECURITY.md +23 -0
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +7 -1
- data/config/locales/pro_errors.yml +22 -0
- data/docker-compose.yml +1 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin/acl.rb +287 -0
- data/lib/karafka/admin.rb +9 -13
- data/lib/karafka/app.rb +5 -3
- data/lib/karafka/base_consumer.rb +9 -1
- data/lib/karafka/cli/base.rb +1 -1
- data/lib/karafka/connection/client.rb +83 -76
- data/lib/karafka/connection/conductor.rb +28 -0
- data/lib/karafka/connection/listener.rb +159 -42
- data/lib/karafka/connection/listeners_batch.rb +5 -11
- data/lib/karafka/connection/manager.rb +72 -0
- data/lib/karafka/connection/messages_buffer.rb +12 -0
- data/lib/karafka/connection/proxy.rb +17 -0
- data/lib/karafka/connection/status.rb +75 -0
- data/lib/karafka/contracts/config.rb +14 -10
- data/lib/karafka/contracts/consumer_group.rb +9 -1
- data/lib/karafka/contracts/topic.rb +3 -1
- data/lib/karafka/errors.rb +17 -0
- data/lib/karafka/instrumentation/logger_listener.rb +3 -0
- data/lib/karafka/instrumentation/notifications.rb +13 -5
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
- data/lib/karafka/pro/base_consumer.rb +47 -0
- data/lib/karafka/pro/connection/manager.rb +269 -0
- data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
- data/lib/karafka/pro/iterator.rb +1 -6
- data/lib/karafka/pro/loader.rb +14 -0
- data/lib/karafka/pro/processing/coordinator.rb +2 -1
- data/lib/karafka/pro/processing/executor.rb +37 -0
- data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
- data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
- data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
- data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
- data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
- data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
- data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
- data/lib/karafka/pro/processing/strategies/default.rb +154 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +39 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +65 -25
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
- data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
- data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
- data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
- data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
- data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
- data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
- data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
- data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
- data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
- data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
- data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
- data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
- data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
- data/lib/karafka/process.rb +5 -3
- data/lib/karafka/processing/coordinator.rb +5 -1
- data/lib/karafka/processing/executor.rb +16 -10
- data/lib/karafka/processing/executors_buffer.rb +19 -4
- data/lib/karafka/processing/schedulers/default.rb +3 -2
- data/lib/karafka/processing/strategies/default.rb +6 -0
- data/lib/karafka/processing/strategies/dlq.rb +36 -0
- data/lib/karafka/routing/builder.rb +12 -2
- data/lib/karafka/routing/consumer_group.rb +5 -5
- data/lib/karafka/routing/features/base.rb +44 -8
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
- data/lib/karafka/routing/topic.rb +8 -10
- data/lib/karafka/runner.rb +13 -3
- data/lib/karafka/server.rb +5 -9
- data/lib/karafka/setup/config.rb +17 -0
- data/lib/karafka/status.rb +23 -14
- data/lib/karafka/templates/karafka.rb.erb +7 -0
- data/lib/karafka/time_trackers/partition_usage.rb +56 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +42 -10
- metadata.gz.sig +0 -0
- data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
|
@@ -25,7 +25,7 @@ module Karafka
|
|
|
25
25
|
# @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
|
|
26
26
|
# @param expanded_topics [Hash] hash with expanded and normalized topics data
|
|
27
27
|
def initialize(consumer, expanded_topics)
|
|
28
|
-
@consumer = Connection::Proxy.new(consumer)
|
|
28
|
+
@consumer = ::Karafka::Connection::Proxy.new(consumer)
|
|
29
29
|
@expanded_topics = expanded_topics
|
|
30
30
|
@mapped_topics = Hash.new { |h, k| h[k] = {} }
|
|
31
31
|
end
|
data/lib/karafka/pro/iterator.rb
CHANGED
|
@@ -22,11 +22,6 @@ module Karafka
|
|
|
22
22
|
#
|
|
23
23
|
# It does **not** create a consumer group and does not have any offset management.
|
|
24
24
|
class Iterator
|
|
25
|
-
# Local partition reference for librdkafka
|
|
26
|
-
Partition = Struct.new(:partition, :offset)
|
|
27
|
-
|
|
28
|
-
private_constant :Partition
|
|
29
|
-
|
|
30
25
|
# A simple API allowing to iterate over topic/partition data, without having to subscribe
|
|
31
26
|
# and deal with rebalances. This API allows for multi-partition streaming and is optimized
|
|
32
27
|
# for data lookups. It allows for explicit stopping iteration over any partition during
|
|
@@ -127,7 +122,7 @@ module Karafka
|
|
|
127
122
|
|
|
128
123
|
@current_consumer.pause(
|
|
129
124
|
Rdkafka::Consumer::TopicPartitionList.new(
|
|
130
|
-
name => [Partition.new(partition, 0)]
|
|
125
|
+
name => [Rdkafka::Consumer::Partition.new(partition, 0)]
|
|
131
126
|
)
|
|
132
127
|
)
|
|
133
128
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
|
@@ -53,6 +53,7 @@ module Karafka
|
|
|
53
53
|
features.each { |feature| feature.pre_setup(config) }
|
|
54
54
|
|
|
55
55
|
reconfigure(config)
|
|
56
|
+
expand
|
|
56
57
|
|
|
57
58
|
load_topic_features
|
|
58
59
|
end
|
|
@@ -82,12 +83,17 @@ module Karafka
|
|
|
82
83
|
|
|
83
84
|
icfg.cli.contract = Contracts::ServerCliOptions.new
|
|
84
85
|
|
|
86
|
+
# Use manager that supports multiplexing
|
|
87
|
+
icfg.connection.manager = Connection::Manager.new
|
|
88
|
+
|
|
85
89
|
icfg.processing.coordinator_class = Processing::Coordinator
|
|
86
90
|
icfg.processing.partitioner_class = Processing::Partitioner
|
|
87
91
|
icfg.processing.scheduler_class = Processing::Schedulers::Default
|
|
88
92
|
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
|
93
|
+
icfg.processing.executor_class = Processing::Executor
|
|
89
94
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
|
90
95
|
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
|
96
|
+
icfg.processing.expansions_selector = Processing::ExpansionsSelector.new
|
|
91
97
|
|
|
92
98
|
icfg.active_job.consumer_class = ActiveJob::Consumer
|
|
93
99
|
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
|
@@ -96,6 +102,14 @@ module Karafka
|
|
|
96
102
|
config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
|
|
97
103
|
end
|
|
98
104
|
|
|
105
|
+
# Adds extra modules to certain classes
|
|
106
|
+
# This expands their functionalities with things that are needed when operating in Pro
|
|
107
|
+
# It is used only when given class is part of the end user API and cannot be swapped by
|
|
108
|
+
# a pluggable component
|
|
109
|
+
def expand
|
|
110
|
+
Karafka::BaseConsumer.include Pro::BaseConsumer
|
|
111
|
+
end
|
|
112
|
+
|
|
99
113
|
# Loads the Pro features of Karafka
|
|
100
114
|
# @note Object space lookup is not the fastest but we do it once during boot, so it's ok
|
|
101
115
|
def load_topic_features
|
|
@@ -42,7 +42,8 @@ module Karafka
|
|
|
42
42
|
|
|
43
43
|
@virtual_offset_manager = VirtualOffsetManager.new(
|
|
44
44
|
topic.name,
|
|
45
|
-
partition
|
|
45
|
+
partition,
|
|
46
|
+
topic.virtual_partitions.offset_metadata_strategy
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
# We register our own "internal" filter to support filtering of messages that were marked
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
# Pro executor that supports periodic jobs
|
|
18
|
+
class Executor < Karafka::Processing::Executor
|
|
19
|
+
# Runs the code that should happen before periodic job is scheduled
|
|
20
|
+
#
|
|
21
|
+
# @note While jobs are called `Periodic`, from the consumer perspective it is "ticking".
|
|
22
|
+
# This name was taken for a reason: we may want to introduce periodic ticking also not
|
|
23
|
+
# only during polling but for example on wait and a name "poll" would not align well.
|
|
24
|
+
# A name "periodic" is not a verb and our other consumer actions are verbs like:
|
|
25
|
+
# consume or revoked. So for the sake of consistency we have ticking here.
|
|
26
|
+
def before_schedule_periodic
|
|
27
|
+
consumer.on_before_schedule_tick
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Triggers consumer ticking
|
|
31
|
+
def periodic
|
|
32
|
+
consumer.on_tick
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
# Pro selector of appropriate topic setup based features enhancements.
|
|
18
|
+
class ExpansionsSelector < Karafka::Processing::ExpansionsSelector
|
|
19
|
+
# @param topic [Karafka::Routing::Topic] topic with settings based on which we find
|
|
20
|
+
# expansions
|
|
21
|
+
# @return [Array<Module>] modules with proper expansions we're suppose to use to enhance
|
|
22
|
+
# the consumer
|
|
23
|
+
def find(topic)
|
|
24
|
+
# Start with the non-pro expansions
|
|
25
|
+
expansions = super
|
|
26
|
+
expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
|
|
27
|
+
expansions
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
module Jobs
|
|
18
|
+
# Job that represents a "ticking" work. Work that we run periodically for the Periodics
|
|
19
|
+
# enabled topics.
|
|
20
|
+
class Periodic < ::Karafka::Processing::Jobs::Base
|
|
21
|
+
# @param executor [Karafka::Pro::Processing::Executor] pro executor that is suppose to
|
|
22
|
+
# run a given job
|
|
23
|
+
def initialize(executor)
|
|
24
|
+
@executor = executor
|
|
25
|
+
super()
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Code executed before we schedule this job
|
|
29
|
+
def before_schedule
|
|
30
|
+
executor.before_schedule_periodic
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Runs the executor periodic action
|
|
34
|
+
def call
|
|
35
|
+
executor.periodic
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
module Jobs
|
|
18
|
+
# Non-Blocking version of the Periodic job
|
|
19
|
+
# We use this version for LRJ topics for cases where saturated resources would not allow
|
|
20
|
+
# to run this job for extended period of time. Under such scenarios, if we would not use
|
|
21
|
+
# a non-blocking one, we would reach max.poll.interval.ms.
|
|
22
|
+
class PeriodicNonBlocking < Periodic
|
|
23
|
+
# @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Periodic`
|
|
24
|
+
def initialize(*args)
|
|
25
|
+
super
|
|
26
|
+
@non_blocking = true
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -16,12 +16,12 @@ module Karafka
|
|
|
16
16
|
module Processing
|
|
17
17
|
# Pro jobs builder that supports lrj
|
|
18
18
|
class JobsBuilder < ::Karafka::Processing::JobsBuilder
|
|
19
|
-
# @param executor [Karafka::Processing::Executor]
|
|
19
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
|
20
20
|
def idle(executor)
|
|
21
21
|
Karafka::Processing::Jobs::Idle.new(executor)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
# @param executor [Karafka::Processing::Executor]
|
|
24
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
|
25
25
|
# @param messages [Karafka::Messages::Messages] messages batch to be consumed
|
|
26
26
|
# @return [Karafka::Processing::Jobs::Consume] blocking job
|
|
27
27
|
# @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
|
|
@@ -33,7 +33,7 @@ module Karafka
|
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
-
# @param executor [Karafka::Processing::Executor]
|
|
36
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
|
37
37
|
# @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
|
|
38
38
|
# @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
|
|
39
39
|
# non-blocking, so when revocation job is scheduled for LRJ it also will not block
|
|
@@ -44,6 +44,17 @@ module Karafka
|
|
|
44
44
|
super
|
|
45
45
|
end
|
|
46
46
|
end
|
|
47
|
+
|
|
48
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
|
49
|
+
# @return [Jobs::Periodic] Periodic job
|
|
50
|
+
# @return [Jobs::PeriodicNonBlocking] Periodic non-blocking job
|
|
51
|
+
def periodic(executor)
|
|
52
|
+
if executor.topic.long_running_job?
|
|
53
|
+
Jobs::PeriodicNonBlocking.new(executor)
|
|
54
|
+
else
|
|
55
|
+
Jobs::Periodic.new(executor)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
47
58
|
end
|
|
48
59
|
end
|
|
49
60
|
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
# Offset Metadata support on the processing side
|
|
18
|
+
module OffsetMetadata
|
|
19
|
+
# Extra API methods for offset metadata fetching
|
|
20
|
+
# @note Part of this feature API is embedded directly into the strategies because it alters
|
|
21
|
+
# how marking methods (`#mark_as_consumed` and `#mark_as_consumed!`) operate. Because
|
|
22
|
+
# of that, they had to be embedded into the strategies.
|
|
23
|
+
module Consumer
|
|
24
|
+
# @param cache [Boolean] should we use cached result if present (true by default)
|
|
25
|
+
# @return [false, Object] false in case we do not own the partition anymore or
|
|
26
|
+
# deserialized metadata based on the deserializer
|
|
27
|
+
# @note Caching is on as the assumption here is, that most of the time user will be
|
|
28
|
+
# interested only in the offset metadata that "came" from the time prior to the
|
|
29
|
+
# rebalance. That is because the rest of the metadata (current) is created and
|
|
30
|
+
# controlled by the user himself, thus there is no need to retrieve it. In case this
|
|
31
|
+
# is not true and user wants to always get the Kafka metadata, `cache` value of this
|
|
32
|
+
# feature can be set to false.
|
|
33
|
+
def offset_metadata(cache: true)
|
|
34
|
+
return false if revoked?
|
|
35
|
+
|
|
36
|
+
Fetcher.find(topic, partition, cache: cache)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
alias committed_offset_metadata offset_metadata
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
# Offset Metadata support on the processing side
|
|
18
|
+
module OffsetMetadata
|
|
19
|
+
# This fetcher is responsible for fetching and caching committed offsets metadata
|
|
20
|
+
# information.
|
|
21
|
+
#
|
|
22
|
+
# By design we fetch all information for a requested topic assignments. Not all topics from
|
|
23
|
+
# the same subscription group may need metadata and even if, we can run the few smaller
|
|
24
|
+
# queries. This approach prevents us from querying all assigned topics data in one go
|
|
25
|
+
# preventing excessive queries.
|
|
26
|
+
#
|
|
27
|
+
# Since the assumption is, that user will not have to reach out for the later metadata
|
|
28
|
+
# since it is produced in the context of a given consumer assignment, we can cache the
|
|
29
|
+
# initial result and only allow users for explicit invalidation.
|
|
30
|
+
class Fetcher
|
|
31
|
+
include Singleton
|
|
32
|
+
|
|
33
|
+
class << self
|
|
34
|
+
extend Forwardable
|
|
35
|
+
|
|
36
|
+
def_delegators :instance, :register, :clear, :find
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def initialize
|
|
40
|
+
@mutexes = {}
|
|
41
|
+
@clients = {}
|
|
42
|
+
@tpls = {}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Registers a client of a given subscription group, so we can use it for queries later on
|
|
46
|
+
# @param client [Karafka::Connection::Client]
|
|
47
|
+
# @note Since we store the client reference and not the underlying rdkafka consumer
|
|
48
|
+
# instance, we do not have to deal with the recovery as it is abstracted away
|
|
49
|
+
def register(client)
|
|
50
|
+
@clients[client.subscription_group] = client
|
|
51
|
+
# We use one mutex per SG because independent SGs can query in parallel
|
|
52
|
+
@mutexes[client.subscription_group] = Mutex.new
|
|
53
|
+
@tpls[client.subscription_group] = {}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Queries or retrieves from cache the given offset metadata for the selected partition
|
|
57
|
+
#
|
|
58
|
+
# @param topic [Karafka::Routing::Topic] routing topic with subscription group reference
|
|
59
|
+
# @param partition [Integer] partition for which we want to get stored offset metadata
|
|
60
|
+
# @param cache [Boolean] forces explicit query to Kafka when false and cache refresh.
|
|
61
|
+
# By default we use the setting from the topic level but this can be overwritten on
|
|
62
|
+
# a per request basis if needed.
|
|
63
|
+
# @return [Object, false] deserialized metadata (string deserializer by default) or
|
|
64
|
+
# false in case we were not able to obtain the details because we have lost the
|
|
65
|
+
# assignment
|
|
66
|
+
def find(topic, partition, cache: true)
|
|
67
|
+
cache = topic.offset_metadata.cache? && cache
|
|
68
|
+
|
|
69
|
+
tpls = fetch(topic, cache)
|
|
70
|
+
|
|
71
|
+
return false unless tpls
|
|
72
|
+
|
|
73
|
+
t_partitions = tpls.fetch(topic.name, [])
|
|
74
|
+
t_partition = t_partitions.find { |t_p| t_p.partition == partition }
|
|
75
|
+
|
|
76
|
+
# If we do not have given topic partition here, it means it is no longer part of our
|
|
77
|
+
# assignment and we should return false
|
|
78
|
+
return false unless t_partition
|
|
79
|
+
|
|
80
|
+
topic.offset_metadata.deserializer.call(t_partition.metadata)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Clears cache of a given subscription group. It is triggered on assignment changes.
|
|
84
|
+
#
|
|
85
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group that
|
|
86
|
+
# we want to clear.
|
|
87
|
+
def clear(subscription_group)
|
|
88
|
+
@mutexes.fetch(subscription_group).synchronize do
|
|
89
|
+
@tpls[subscription_group].clear
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
# Fetches from Kafka all committed offsets for the given topic partitions that are
|
|
96
|
+
# assigned to this process.
|
|
97
|
+
#
|
|
98
|
+
# We fetch all because in majority of the cases, the behavior of the end user code is
|
|
99
|
+
# not specific to a given partition both same for all. In such cases we save on
|
|
100
|
+
# querying as we get all data for all partitions in one go.
|
|
101
|
+
#
|
|
102
|
+
# @param topic [Karafka::Routing::Topic] topic for which we want to fetch tpls data
|
|
103
|
+
# @param cache [Boolean] should we return cached data if present
|
|
104
|
+
def fetch(topic, cache)
|
|
105
|
+
subscription_group = topic.subscription_group
|
|
106
|
+
t_tpls = @tpls.fetch(subscription_group, false)
|
|
107
|
+
t_tpl = t_tpls[topic]
|
|
108
|
+
|
|
109
|
+
return t_tpl if t_tpl && cache
|
|
110
|
+
|
|
111
|
+
assigned_tpls = @clients.fetch(subscription_group).assignment
|
|
112
|
+
t_tpl = assigned_tpls.to_h.fetch(topic.name, false)
|
|
113
|
+
|
|
114
|
+
# May be false in case we lost given assignment but still run LRJ
|
|
115
|
+
return false unless t_tpl
|
|
116
|
+
return false if t_tpl.empty?
|
|
117
|
+
|
|
118
|
+
@mutexes.fetch(subscription_group).synchronize do
|
|
119
|
+
rd_tpl = Rdkafka::Consumer::TopicPartitionList.new(topic.name => t_tpl)
|
|
120
|
+
|
|
121
|
+
# While in theory we could lost assignment while being here, this will work and will
|
|
122
|
+
# return us proper tpl, we do not deal with this case on this layer and report anyhow
|
|
123
|
+
# There will not be any exception and this will operate correctly
|
|
124
|
+
t_tpls[topic] = @clients.fetch(subscription_group).committed(rd_tpl).to_h
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
module OffsetMetadata
|
|
18
|
+
# Keeps track of rebalances and updates the fetcher
|
|
19
|
+
# Since we cache the tpls with metadata, we need to invalidate them on events that would
|
|
20
|
+
# cause changes in the assignments
|
|
21
|
+
class Listener
|
|
22
|
+
# When we start listening we need to register this client in the metadata fetcher, so
|
|
23
|
+
# we have the client related to a given subscription group that we can use in fetcher
|
|
24
|
+
# since fetcher may be used in filtering API and other places outside of the standard
|
|
25
|
+
# consumer flow
|
|
26
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
|
27
|
+
def on_connection_listener_before_fetch_loop(event)
|
|
28
|
+
Fetcher.register event[:client]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Invalidates internal cache when assignments change so we can get correct metadata
|
|
32
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
|
33
|
+
def on_rebalance_partitions_assigned(event)
|
|
34
|
+
Fetcher.clear event[:subscription_group]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Invalidates internal cache when assignments change so we can get correct metadata
|
|
38
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
|
39
|
+
def on_rebalance_partitions_revoked(event)
|
|
40
|
+
Fetcher.clear event[:subscription_group]
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -31,9 +31,19 @@ module Karafka
|
|
|
31
31
|
@mutex = Mutex.new
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
+
# Schedules any jobs provided in a fifo order
|
|
35
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
|
|
36
|
+
def schedule_fifo(jobs_array)
|
|
37
|
+
jobs_array.each do |job|
|
|
38
|
+
@queue << job
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
34
42
|
# Runs the consumption jobs scheduling flow under a mutex
|
|
35
43
|
#
|
|
36
|
-
# @param jobs_array
|
|
44
|
+
# @param jobs_array
|
|
45
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
|
46
|
+
# jobs for scheduling
|
|
37
47
|
def on_schedule_consumption(jobs_array)
|
|
38
48
|
@mutex.synchronize do
|
|
39
49
|
schedule_consumption(jobs_array)
|
|
@@ -42,53 +52,59 @@ module Karafka
|
|
|
42
52
|
|
|
43
53
|
# Should schedule the consumption jobs
|
|
44
54
|
#
|
|
45
|
-
# @param _jobs_array
|
|
55
|
+
# @param _jobs_array
|
|
56
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
|
57
|
+
# jobs for scheduling
|
|
46
58
|
def schedule_consumption(_jobs_array)
|
|
47
59
|
raise NotImplementedError, 'Implement in a subclass'
|
|
48
60
|
end
|
|
49
61
|
|
|
50
62
|
# Runs the revocation jobs scheduling flow under a mutex
|
|
51
63
|
#
|
|
52
|
-
# @param jobs_array
|
|
64
|
+
# @param jobs_array
|
|
65
|
+
# [Array<Karafka::Processing::Jobs::Revoked, Processing::Jobs::RevokedNonBlocking>]
|
|
66
|
+
# jobs for scheduling
|
|
53
67
|
def on_schedule_revocation(jobs_array)
|
|
54
68
|
@mutex.synchronize do
|
|
55
69
|
schedule_revocation(jobs_array)
|
|
56
70
|
end
|
|
57
71
|
end
|
|
58
72
|
|
|
59
|
-
# Schedules the revocation jobs.
|
|
60
|
-
#
|
|
61
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
|
62
|
-
#
|
|
63
|
-
# @note We provide a default scheduler logic here because by default revocation jobs
|
|
64
|
-
# should be scheduled as fast as possible.
|
|
65
|
-
def schedule_revocation(jobs_array)
|
|
66
|
-
jobs_array.each do |job|
|
|
67
|
-
@queue << job
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
|
|
71
73
|
# Runs the shutdown jobs scheduling flow under a mutex
|
|
72
74
|
#
|
|
73
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::
|
|
75
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Shutdown>] jobs for scheduling
|
|
74
76
|
def on_schedule_shutdown(jobs_array)
|
|
75
77
|
@mutex.synchronize do
|
|
76
78
|
schedule_shutdown(jobs_array)
|
|
77
79
|
end
|
|
78
80
|
end
|
|
79
81
|
|
|
80
|
-
#
|
|
82
|
+
# Runs the idle jobs scheduling flow under a mutex
|
|
81
83
|
#
|
|
82
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::
|
|
84
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Idle>] jobs for scheduling
|
|
85
|
+
def on_schedule_idle(jobs_array)
|
|
86
|
+
@mutex.synchronize do
|
|
87
|
+
schedule_idle(jobs_array)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Runs the periodic jobs scheduling flow under a mutex
|
|
83
92
|
#
|
|
84
|
-
# @
|
|
85
|
-
#
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
93
|
+
# @param jobs_array
|
|
94
|
+
# [Array<Processing::Jobs::Periodic, Processing::Jobs::PeriodicNonBlocking>]
|
|
95
|
+
# jobs for scheduling
|
|
96
|
+
def on_schedule_periodic(jobs_array)
|
|
97
|
+
@mutex.synchronize do
|
|
98
|
+
schedule_periodic(jobs_array)
|
|
89
99
|
end
|
|
90
100
|
end
|
|
91
101
|
|
|
102
|
+
# Schedule by default all except consumption as fifo
|
|
103
|
+
alias schedule_revocation schedule_fifo
|
|
104
|
+
alias schedule_shutdown schedule_fifo
|
|
105
|
+
alias schedule_idle schedule_fifo
|
|
106
|
+
alias schedule_periodic schedule_fifo
|
|
107
|
+
|
|
92
108
|
# Runs the manage tick under mutex
|
|
93
109
|
def on_manage
|
|
94
110
|
@mutex.synchronize { manage }
|
|
@@ -31,7 +31,9 @@ module Karafka
|
|
|
31
31
|
class Default < Base
|
|
32
32
|
# Schedules jobs in the LJF order for consumption
|
|
33
33
|
#
|
|
34
|
-
# @param jobs_array
|
|
34
|
+
# @param jobs_array
|
|
35
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
|
36
|
+
# jobs for scheduling
|
|
35
37
|
def on_schedule_consumption(jobs_array)
|
|
36
38
|
perf_tracker = Instrumentation::PerformanceTracker.instance
|
|
37
39
|
|
|
@@ -53,23 +55,19 @@ module Karafka
|
|
|
53
55
|
end
|
|
54
56
|
end
|
|
55
57
|
|
|
56
|
-
# Schedules jobs in
|
|
57
|
-
#
|
|
58
|
-
|
|
59
|
-
def on_schedule_revocation(jobs_array)
|
|
58
|
+
# Schedules any jobs provided in a fifo order
|
|
59
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
|
|
60
|
+
def schedule_fifo(jobs_array)
|
|
60
61
|
jobs_array.each do |job|
|
|
61
62
|
@queue << job
|
|
62
63
|
end
|
|
63
64
|
end
|
|
64
65
|
|
|
65
|
-
#
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
@queue << job
|
|
71
|
-
end
|
|
72
|
-
end
|
|
66
|
+
# By default all non-consumption work is scheduled in a fifo order
|
|
67
|
+
alias on_schedule_revocation schedule_fifo
|
|
68
|
+
alias on_schedule_shutdown schedule_fifo
|
|
69
|
+
alias on_schedule_idle schedule_fifo
|
|
70
|
+
alias on_schedule_periodic schedule_fifo
|
|
73
71
|
|
|
74
72
|
# This scheduler does not have anything to manage as it is a pass through and has no
|
|
75
73
|
# state
|
|
@@ -87,7 +85,7 @@ module Karafka
|
|
|
87
85
|
private
|
|
88
86
|
|
|
89
87
|
# @param perf_tracker [PerformanceTracker]
|
|
90
|
-
# @param job [Karafka::Processing::Jobs::
|
|
88
|
+
# @param job [Karafka::Processing::Jobs::Consume] job we will be processing
|
|
91
89
|
# @return [Numeric] estimated cost of processing this job
|
|
92
90
|
def processing_cost(perf_tracker, job)
|
|
93
91
|
if job.is_a?(::Karafka::Processing::Jobs::Consume)
|