karafka 2.2.14 → 2.3.0.alpha2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +38 -12
- data/.ruby-version +1 -1
- data/CHANGELOG.md +24 -0
- data/Gemfile.lock +16 -16
- data/README.md +0 -2
- data/SECURITY.md +23 -0
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +7 -1
- data/config/locales/pro_errors.yml +22 -0
- data/docker-compose.yml +1 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin/acl.rb +287 -0
- data/lib/karafka/admin.rb +9 -13
- data/lib/karafka/app.rb +5 -3
- data/lib/karafka/base_consumer.rb +9 -1
- data/lib/karafka/cli/base.rb +1 -1
- data/lib/karafka/connection/client.rb +83 -76
- data/lib/karafka/connection/conductor.rb +28 -0
- data/lib/karafka/connection/listener.rb +159 -42
- data/lib/karafka/connection/listeners_batch.rb +5 -11
- data/lib/karafka/connection/manager.rb +72 -0
- data/lib/karafka/connection/messages_buffer.rb +12 -0
- data/lib/karafka/connection/proxy.rb +17 -0
- data/lib/karafka/connection/status.rb +75 -0
- data/lib/karafka/contracts/config.rb +14 -10
- data/lib/karafka/contracts/consumer_group.rb +9 -1
- data/lib/karafka/contracts/topic.rb +3 -1
- data/lib/karafka/errors.rb +17 -0
- data/lib/karafka/instrumentation/logger_listener.rb +3 -0
- data/lib/karafka/instrumentation/notifications.rb +13 -5
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
- data/lib/karafka/pro/base_consumer.rb +47 -0
- data/lib/karafka/pro/connection/manager.rb +269 -0
- data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
- data/lib/karafka/pro/iterator.rb +1 -6
- data/lib/karafka/pro/loader.rb +14 -0
- data/lib/karafka/pro/processing/coordinator.rb +2 -1
- data/lib/karafka/pro/processing/executor.rb +37 -0
- data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
- data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
- data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
- data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
- data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
- data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
- data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
- data/lib/karafka/pro/processing/strategies/default.rb +154 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +39 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +65 -25
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
- data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
- data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
- data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
- data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
- data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
- data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
- data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
- data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
- data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
- data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
- data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
- data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
- data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
- data/lib/karafka/process.rb +5 -3
- data/lib/karafka/processing/coordinator.rb +5 -1
- data/lib/karafka/processing/executor.rb +16 -10
- data/lib/karafka/processing/executors_buffer.rb +19 -4
- data/lib/karafka/processing/schedulers/default.rb +3 -2
- data/lib/karafka/processing/strategies/default.rb +6 -0
- data/lib/karafka/processing/strategies/dlq.rb +36 -0
- data/lib/karafka/routing/builder.rb +12 -2
- data/lib/karafka/routing/consumer_group.rb +5 -5
- data/lib/karafka/routing/features/base.rb +44 -8
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
- data/lib/karafka/routing/topic.rb +8 -10
- data/lib/karafka/runner.rb +13 -3
- data/lib/karafka/server.rb +5 -9
- data/lib/karafka/setup/config.rb +17 -0
- data/lib/karafka/status.rb +23 -14
- data/lib/karafka/templates/karafka.rb.erb +7 -0
- data/lib/karafka/time_trackers/partition_usage.rb +56 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +42 -10
- metadata.gz.sig +0 -0
- data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
# @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
|
26
26
|
# @param expanded_topics [Hash] hash with expanded and normalized topics data
|
27
27
|
def initialize(consumer, expanded_topics)
|
28
|
-
@consumer = Connection::Proxy.new(consumer)
|
28
|
+
@consumer = ::Karafka::Connection::Proxy.new(consumer)
|
29
29
|
@expanded_topics = expanded_topics
|
30
30
|
@mapped_topics = Hash.new { |h, k| h[k] = {} }
|
31
31
|
end
|
data/lib/karafka/pro/iterator.rb
CHANGED
@@ -22,11 +22,6 @@ module Karafka
|
|
22
22
|
#
|
23
23
|
# It does **not** create a consumer group and does not have any offset management.
|
24
24
|
class Iterator
|
25
|
-
# Local partition reference for librdkafka
|
26
|
-
Partition = Struct.new(:partition, :offset)
|
27
|
-
|
28
|
-
private_constant :Partition
|
29
|
-
|
30
25
|
# A simple API allowing to iterate over topic/partition data, without having to subscribe
|
31
26
|
# and deal with rebalances. This API allows for multi-partition streaming and is optimized
|
32
27
|
# for data lookups. It allows for explicit stopping iteration over any partition during
|
@@ -127,7 +122,7 @@ module Karafka
|
|
127
122
|
|
128
123
|
@current_consumer.pause(
|
129
124
|
Rdkafka::Consumer::TopicPartitionList.new(
|
130
|
-
name => [Partition.new(partition, 0)]
|
125
|
+
name => [Rdkafka::Consumer::Partition.new(partition, 0)]
|
131
126
|
)
|
132
127
|
)
|
133
128
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -53,6 +53,7 @@ module Karafka
|
|
53
53
|
features.each { |feature| feature.pre_setup(config) }
|
54
54
|
|
55
55
|
reconfigure(config)
|
56
|
+
expand
|
56
57
|
|
57
58
|
load_topic_features
|
58
59
|
end
|
@@ -82,12 +83,17 @@ module Karafka
|
|
82
83
|
|
83
84
|
icfg.cli.contract = Contracts::ServerCliOptions.new
|
84
85
|
|
86
|
+
# Use manager that supports multiplexing
|
87
|
+
icfg.connection.manager = Connection::Manager.new
|
88
|
+
|
85
89
|
icfg.processing.coordinator_class = Processing::Coordinator
|
86
90
|
icfg.processing.partitioner_class = Processing::Partitioner
|
87
91
|
icfg.processing.scheduler_class = Processing::Schedulers::Default
|
88
92
|
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
93
|
+
icfg.processing.executor_class = Processing::Executor
|
89
94
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
90
95
|
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
96
|
+
icfg.processing.expansions_selector = Processing::ExpansionsSelector.new
|
91
97
|
|
92
98
|
icfg.active_job.consumer_class = ActiveJob::Consumer
|
93
99
|
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
@@ -96,6 +102,14 @@ module Karafka
|
|
96
102
|
config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
|
97
103
|
end
|
98
104
|
|
105
|
+
# Adds extra modules to certain classes
|
106
|
+
# This expands their functionalities with things that are needed when operating in Pro
|
107
|
+
# It is used only when given class is part of the end user API and cannot be swapped by
|
108
|
+
# a pluggable component
|
109
|
+
def expand
|
110
|
+
Karafka::BaseConsumer.include Pro::BaseConsumer
|
111
|
+
end
|
112
|
+
|
99
113
|
# Loads the Pro features of Karafka
|
100
114
|
# @note Object space lookup is not the fastest but we do it once during boot, so it's ok
|
101
115
|
def load_topic_features
|
@@ -42,7 +42,8 @@ module Karafka
|
|
42
42
|
|
43
43
|
@virtual_offset_manager = VirtualOffsetManager.new(
|
44
44
|
topic.name,
|
45
|
-
partition
|
45
|
+
partition,
|
46
|
+
topic.virtual_partitions.offset_metadata_strategy
|
46
47
|
)
|
47
48
|
|
48
49
|
# We register our own "internal" filter to support filtering of messages that were marked
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Pro executor that supports periodic jobs
|
18
|
+
class Executor < Karafka::Processing::Executor
|
19
|
+
# Runs the code that should happen before periodic job is scheduled
|
20
|
+
#
|
21
|
+
# @note While jobs are called `Periodic`, from the consumer perspective it is "ticking".
|
22
|
+
# This name was taken for a reason: we may want to introduce periodic ticking also not
|
23
|
+
# only during polling but for example on wait and a name "poll" would not align well.
|
24
|
+
# A name "periodic" is not a verb and our other consumer actions are verbs like:
|
25
|
+
# consume or revoked. So for the sake of consistency we have ticking here.
|
26
|
+
def before_schedule_periodic
|
27
|
+
consumer.on_before_schedule_tick
|
28
|
+
end
|
29
|
+
|
30
|
+
# Triggers consumer ticking
|
31
|
+
def periodic
|
32
|
+
consumer.on_tick
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Pro selector of appropriate topic setup based features enhancements.
|
18
|
+
class ExpansionsSelector < Karafka::Processing::ExpansionsSelector
|
19
|
+
# @param topic [Karafka::Routing::Topic] topic with settings based on which we find
|
20
|
+
# expansions
|
21
|
+
# @return [Array<Module>] modules with proper expansions we're suppose to use to enhance
|
22
|
+
# the consumer
|
23
|
+
def find(topic)
|
24
|
+
# Start with the non-pro expansions
|
25
|
+
expansions = super
|
26
|
+
expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
|
27
|
+
expansions
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
module Jobs
|
18
|
+
# Job that represents a "ticking" work. Work that we run periodically for the Periodics
|
19
|
+
# enabled topics.
|
20
|
+
class Periodic < ::Karafka::Processing::Jobs::Base
|
21
|
+
# @param executor [Karafka::Pro::Processing::Executor] pro executor that is suppose to
|
22
|
+
# run a given job
|
23
|
+
def initialize(executor)
|
24
|
+
@executor = executor
|
25
|
+
super()
|
26
|
+
end
|
27
|
+
|
28
|
+
# Code executed before we schedule this job
|
29
|
+
def before_schedule
|
30
|
+
executor.before_schedule_periodic
|
31
|
+
end
|
32
|
+
|
33
|
+
# Runs the executor periodic action
|
34
|
+
def call
|
35
|
+
executor.periodic
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
module Jobs
|
18
|
+
# Non-Blocking version of the Periodic job
|
19
|
+
# We use this version for LRJ topics for cases where saturated resources would not allow
|
20
|
+
# to run this job for extended period of time. Under such scenarios, if we would not use
|
21
|
+
# a non-blocking one, we would reach max.poll.interval.ms.
|
22
|
+
class PeriodicNonBlocking < Periodic
|
23
|
+
# @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Periodic`
|
24
|
+
def initialize(*args)
|
25
|
+
super
|
26
|
+
@non_blocking = true
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -16,12 +16,12 @@ module Karafka
|
|
16
16
|
module Processing
|
17
17
|
# Pro jobs builder that supports lrj
|
18
18
|
class JobsBuilder < ::Karafka::Processing::JobsBuilder
|
19
|
-
# @param executor [Karafka::Processing::Executor]
|
19
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
20
20
|
def idle(executor)
|
21
21
|
Karafka::Processing::Jobs::Idle.new(executor)
|
22
22
|
end
|
23
23
|
|
24
|
-
# @param executor [Karafka::Processing::Executor]
|
24
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
25
25
|
# @param messages [Karafka::Messages::Messages] messages batch to be consumed
|
26
26
|
# @return [Karafka::Processing::Jobs::Consume] blocking job
|
27
27
|
# @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
|
@@ -33,7 +33,7 @@ module Karafka
|
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
# @param executor [Karafka::Processing::Executor]
|
36
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
37
37
|
# @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
|
38
38
|
# @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
|
39
39
|
# non-blocking, so when revocation job is scheduled for LRJ it also will not block
|
@@ -44,6 +44,17 @@ module Karafka
|
|
44
44
|
super
|
45
45
|
end
|
46
46
|
end
|
47
|
+
|
48
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
49
|
+
# @return [Jobs::Periodic] Periodic job
|
50
|
+
# @return [Jobs::PeriodicNonBlocking] Periodic non-blocking job
|
51
|
+
def periodic(executor)
|
52
|
+
if executor.topic.long_running_job?
|
53
|
+
Jobs::PeriodicNonBlocking.new(executor)
|
54
|
+
else
|
55
|
+
Jobs::Periodic.new(executor)
|
56
|
+
end
|
57
|
+
end
|
47
58
|
end
|
48
59
|
end
|
49
60
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Offset Metadata support on the processing side
|
18
|
+
module OffsetMetadata
|
19
|
+
# Extra API methods for offset metadata fetching
|
20
|
+
# @note Part of this feature API is embedded directly into the strategies because it alters
|
21
|
+
# how marking methods (`#mark_as_consumed` and `#mark_as_consumed!`) operate. Because
|
22
|
+
# of that, they had to be embedded into the strategies.
|
23
|
+
module Consumer
|
24
|
+
# @param cache [Boolean] should we use cached result if present (true by default)
|
25
|
+
# @return [false, Object] false in case we do not own the partition anymore or
|
26
|
+
# deserialized metadata based on the deserializer
|
27
|
+
# @note Caching is on as the assumption here is, that most of the time user will be
|
28
|
+
# interested only in the offset metadata that "came" from the time prior to the
|
29
|
+
# rebalance. That is because the rest of the metadata (current) is created and
|
30
|
+
# controlled by the user himself, thus there is no need to retrieve it. In case this
|
31
|
+
# is not true and user wants to always get the Kafka metadata, `cache` value of this
|
32
|
+
# feature can be set to false.
|
33
|
+
def offset_metadata(cache: true)
|
34
|
+
return false if revoked?
|
35
|
+
|
36
|
+
Fetcher.find(topic, partition, cache: cache)
|
37
|
+
end
|
38
|
+
|
39
|
+
alias committed_offset_metadata offset_metadata
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Offset Metadata support on the processing side
|
18
|
+
module OffsetMetadata
|
19
|
+
# This fetcher is responsible for fetching and caching committed offsets metadata
|
20
|
+
# information.
|
21
|
+
#
|
22
|
+
# By design we fetch all information for a requested topic assignments. Not all topics from
|
23
|
+
# the same subscription group may need metadata and even if, we can run the few smaller
|
24
|
+
# queries. This approach prevents us from querying all assigned topics data in one go
|
25
|
+
# preventing excessive queries.
|
26
|
+
#
|
27
|
+
# Since the assumption is, that user will not have to reach out for the later metadata
|
28
|
+
# since it is produced in the context of a given consumer assignment, we can cache the
|
29
|
+
# initial result and only allow users for explicit invalidation.
|
30
|
+
class Fetcher
|
31
|
+
include Singleton
|
32
|
+
|
33
|
+
class << self
|
34
|
+
extend Forwardable
|
35
|
+
|
36
|
+
def_delegators :instance, :register, :clear, :find
|
37
|
+
end
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
@mutexes = {}
|
41
|
+
@clients = {}
|
42
|
+
@tpls = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# Registers a client of a given subscription group, so we can use it for queries later on
|
46
|
+
# @param client [Karafka::Connection::Client]
|
47
|
+
# @note Since we store the client reference and not the underlying rdkafka consumer
|
48
|
+
# instance, we do not have to deal with the recovery as it is abstracted away
|
49
|
+
def register(client)
|
50
|
+
@clients[client.subscription_group] = client
|
51
|
+
# We use one mutex per SG because independent SGs can query in parallel
|
52
|
+
@mutexes[client.subscription_group] = Mutex.new
|
53
|
+
@tpls[client.subscription_group] = {}
|
54
|
+
end
|
55
|
+
|
56
|
+
# Queries or retrieves from cache the given offset metadata for the selected partition
|
57
|
+
#
|
58
|
+
# @param topic [Karafka::Routing::Topic] routing topic with subscription group reference
|
59
|
+
# @param partition [Integer] partition for which we want to get stored offset metadata
|
60
|
+
# @param cache [Boolean] forces explicit query to Kafka when false and cache refresh.
|
61
|
+
# By default we use the setting from the topic level but this can be overwritten on
|
62
|
+
# a per request basis if needed.
|
63
|
+
# @return [Object, false] deserialized metadata (string deserializer by default) or
|
64
|
+
# false in case we were not able to obtain the details because we have lost the
|
65
|
+
# assignment
|
66
|
+
def find(topic, partition, cache: true)
|
67
|
+
cache = topic.offset_metadata.cache? && cache
|
68
|
+
|
69
|
+
tpls = fetch(topic, cache)
|
70
|
+
|
71
|
+
return false unless tpls
|
72
|
+
|
73
|
+
t_partitions = tpls.fetch(topic.name, [])
|
74
|
+
t_partition = t_partitions.find { |t_p| t_p.partition == partition }
|
75
|
+
|
76
|
+
# If we do not have given topic partition here, it means it is no longer part of our
|
77
|
+
# assignment and we should return false
|
78
|
+
return false unless t_partition
|
79
|
+
|
80
|
+
topic.offset_metadata.deserializer.call(t_partition.metadata)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Clears cache of a given subscription group. It is triggered on assignment changes.
|
84
|
+
#
|
85
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group that
|
86
|
+
# we want to clear.
|
87
|
+
def clear(subscription_group)
|
88
|
+
@mutexes.fetch(subscription_group).synchronize do
|
89
|
+
@tpls[subscription_group].clear
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
# Fetches from Kafka all committed offsets for the given topic partitions that are
|
96
|
+
# assigned to this process.
|
97
|
+
#
|
98
|
+
# We fetch all because in majority of the cases, the behavior of the end user code is
|
99
|
+
# not specific to a given partition both same for all. In such cases we save on
|
100
|
+
# querying as we get all data for all partitions in one go.
|
101
|
+
#
|
102
|
+
# @param topic [Karafka::Routing::Topic] topic for which we want to fetch tpls data
|
103
|
+
# @param cache [Boolean] should we return cached data if present
|
104
|
+
def fetch(topic, cache)
|
105
|
+
subscription_group = topic.subscription_group
|
106
|
+
t_tpls = @tpls.fetch(subscription_group, false)
|
107
|
+
t_tpl = t_tpls[topic]
|
108
|
+
|
109
|
+
return t_tpl if t_tpl && cache
|
110
|
+
|
111
|
+
assigned_tpls = @clients.fetch(subscription_group).assignment
|
112
|
+
t_tpl = assigned_tpls.to_h.fetch(topic.name, false)
|
113
|
+
|
114
|
+
# May be false in case we lost given assignment but still run LRJ
|
115
|
+
return false unless t_tpl
|
116
|
+
return false if t_tpl.empty?
|
117
|
+
|
118
|
+
@mutexes.fetch(subscription_group).synchronize do
|
119
|
+
rd_tpl = Rdkafka::Consumer::TopicPartitionList.new(topic.name => t_tpl)
|
120
|
+
|
121
|
+
# While in theory we could lost assignment while being here, this will work and will
|
122
|
+
# return us proper tpl, we do not deal with this case on this layer and report anyhow
|
123
|
+
# There will not be any exception and this will operate correctly
|
124
|
+
t_tpls[topic] = @clients.fetch(subscription_group).committed(rd_tpl).to_h
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
module OffsetMetadata
|
18
|
+
# Keeps track of rebalances and updates the fetcher
|
19
|
+
# Since we cache the tpls with metadata, we need to invalidate them on events that would
|
20
|
+
# cause changes in the assignments
|
21
|
+
class Listener
|
22
|
+
# When we start listening we need to register this client in the metadata fetcher, so
|
23
|
+
# we have the client related to a given subscription group that we can use in fetcher
|
24
|
+
# since fetcher may be used in filtering API and other places outside of the standard
|
25
|
+
# consumer flow
|
26
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
27
|
+
def on_connection_listener_before_fetch_loop(event)
|
28
|
+
Fetcher.register event[:client]
|
29
|
+
end
|
30
|
+
|
31
|
+
# Invalidates internal cache when assignments change so we can get correct metadata
|
32
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
33
|
+
def on_rebalance_partitions_assigned(event)
|
34
|
+
Fetcher.clear event[:subscription_group]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Invalidates internal cache when assignments change so we can get correct metadata
|
38
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
39
|
+
def on_rebalance_partitions_revoked(event)
|
40
|
+
Fetcher.clear event[:subscription_group]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -31,9 +31,19 @@ module Karafka
|
|
31
31
|
@mutex = Mutex.new
|
32
32
|
end
|
33
33
|
|
34
|
+
# Schedules any jobs provided in a fifo order
|
35
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
|
36
|
+
def schedule_fifo(jobs_array)
|
37
|
+
jobs_array.each do |job|
|
38
|
+
@queue << job
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
34
42
|
# Runs the consumption jobs scheduling flow under a mutex
|
35
43
|
#
|
36
|
-
# @param jobs_array
|
44
|
+
# @param jobs_array
|
45
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
46
|
+
# jobs for scheduling
|
37
47
|
def on_schedule_consumption(jobs_array)
|
38
48
|
@mutex.synchronize do
|
39
49
|
schedule_consumption(jobs_array)
|
@@ -42,53 +52,59 @@ module Karafka
|
|
42
52
|
|
43
53
|
# Should schedule the consumption jobs
|
44
54
|
#
|
45
|
-
# @param _jobs_array
|
55
|
+
# @param _jobs_array
|
56
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
57
|
+
# jobs for scheduling
|
46
58
|
def schedule_consumption(_jobs_array)
|
47
59
|
raise NotImplementedError, 'Implement in a subclass'
|
48
60
|
end
|
49
61
|
|
50
62
|
# Runs the revocation jobs scheduling flow under a mutex
|
51
63
|
#
|
52
|
-
# @param jobs_array
|
64
|
+
# @param jobs_array
|
65
|
+
# [Array<Karafka::Processing::Jobs::Revoked, Processing::Jobs::RevokedNonBlocking>]
|
66
|
+
# jobs for scheduling
|
53
67
|
def on_schedule_revocation(jobs_array)
|
54
68
|
@mutex.synchronize do
|
55
69
|
schedule_revocation(jobs_array)
|
56
70
|
end
|
57
71
|
end
|
58
72
|
|
59
|
-
# Schedules the revocation jobs.
|
60
|
-
#
|
61
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
62
|
-
#
|
63
|
-
# @note We provide a default scheduler logic here because by default revocation jobs
|
64
|
-
# should be scheduled as fast as possible.
|
65
|
-
def schedule_revocation(jobs_array)
|
66
|
-
jobs_array.each do |job|
|
67
|
-
@queue << job
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
73
|
# Runs the shutdown jobs scheduling flow under a mutex
|
72
74
|
#
|
73
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::
|
75
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Shutdown>] jobs for scheduling
|
74
76
|
def on_schedule_shutdown(jobs_array)
|
75
77
|
@mutex.synchronize do
|
76
78
|
schedule_shutdown(jobs_array)
|
77
79
|
end
|
78
80
|
end
|
79
81
|
|
80
|
-
#
|
82
|
+
# Runs the idle jobs scheduling flow under a mutex
|
81
83
|
#
|
82
|
-
# @param jobs_array [Array<Karafka::Processing::Jobs::
|
84
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Idle>] jobs for scheduling
|
85
|
+
def on_schedule_idle(jobs_array)
|
86
|
+
@mutex.synchronize do
|
87
|
+
schedule_idle(jobs_array)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Runs the periodic jobs scheduling flow under a mutex
|
83
92
|
#
|
84
|
-
# @
|
85
|
-
#
|
86
|
-
|
87
|
-
|
88
|
-
|
93
|
+
# @param jobs_array
|
94
|
+
# [Array<Processing::Jobs::Periodic, Processing::Jobs::PeriodicNonBlocking>]
|
95
|
+
# jobs for scheduling
|
96
|
+
def on_schedule_periodic(jobs_array)
|
97
|
+
@mutex.synchronize do
|
98
|
+
schedule_periodic(jobs_array)
|
89
99
|
end
|
90
100
|
end
|
91
101
|
|
102
|
+
# Schedule by default all except consumption as fifo
|
103
|
+
alias schedule_revocation schedule_fifo
|
104
|
+
alias schedule_shutdown schedule_fifo
|
105
|
+
alias schedule_idle schedule_fifo
|
106
|
+
alias schedule_periodic schedule_fifo
|
107
|
+
|
92
108
|
# Runs the manage tick under mutex
|
93
109
|
def on_manage
|
94
110
|
@mutex.synchronize { manage }
|
@@ -31,7 +31,9 @@ module Karafka
|
|
31
31
|
class Default < Base
|
32
32
|
# Schedules jobs in the LJF order for consumption
|
33
33
|
#
|
34
|
-
# @param jobs_array
|
34
|
+
# @param jobs_array
|
35
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
36
|
+
# jobs for scheduling
|
35
37
|
def on_schedule_consumption(jobs_array)
|
36
38
|
perf_tracker = Instrumentation::PerformanceTracker.instance
|
37
39
|
|
@@ -53,23 +55,19 @@ module Karafka
|
|
53
55
|
end
|
54
56
|
end
|
55
57
|
|
56
|
-
# Schedules jobs in
|
57
|
-
#
|
58
|
-
|
59
|
-
def on_schedule_revocation(jobs_array)
|
58
|
+
# Schedules any jobs provided in a fifo order
|
59
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
|
60
|
+
def schedule_fifo(jobs_array)
|
60
61
|
jobs_array.each do |job|
|
61
62
|
@queue << job
|
62
63
|
end
|
63
64
|
end
|
64
65
|
|
65
|
-
#
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
@queue << job
|
71
|
-
end
|
72
|
-
end
|
66
|
+
# By default all non-consumption work is scheduled in a fifo order
|
67
|
+
alias on_schedule_revocation schedule_fifo
|
68
|
+
alias on_schedule_shutdown schedule_fifo
|
69
|
+
alias on_schedule_idle schedule_fifo
|
70
|
+
alias on_schedule_periodic schedule_fifo
|
73
71
|
|
74
72
|
# This scheduler does not have anything to manage as it is a pass through and has no
|
75
73
|
# state
|
@@ -87,7 +85,7 @@ module Karafka
|
|
87
85
|
private
|
88
86
|
|
89
87
|
# @param perf_tracker [PerformanceTracker]
|
90
|
-
# @param job [Karafka::Processing::Jobs::
|
88
|
+
# @param job [Karafka::Processing::Jobs::Consume] job we will be processing
|
91
89
|
# @return [Numeric] estimated cost of processing this job
|
92
90
|
def processing_cost(perf_tracker, job)
|
93
91
|
if job.is_a?(::Karafka::Processing::Jobs::Consume)
|