karafka 2.2.13 → 2.3.0.alpha1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +38 -12
- data/.ruby-version +1 -1
- data/CHANGELOG.md +161 -125
- data/Gemfile.lock +12 -12
- data/README.md +0 -2
- data/SECURITY.md +23 -0
- data/config/locales/errors.yml +7 -1
- data/config/locales/pro_errors.yml +22 -0
- data/docker-compose.yml +3 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin/acl.rb +287 -0
- data/lib/karafka/admin.rb +118 -16
- data/lib/karafka/app.rb +12 -3
- data/lib/karafka/base_consumer.rb +32 -31
- data/lib/karafka/cli/base.rb +1 -1
- data/lib/karafka/connection/client.rb +94 -84
- data/lib/karafka/connection/conductor.rb +28 -0
- data/lib/karafka/connection/listener.rb +165 -46
- data/lib/karafka/connection/listeners_batch.rb +5 -11
- data/lib/karafka/connection/manager.rb +72 -0
- data/lib/karafka/connection/messages_buffer.rb +12 -0
- data/lib/karafka/connection/proxy.rb +17 -0
- data/lib/karafka/connection/status.rb +75 -0
- data/lib/karafka/contracts/config.rb +14 -10
- data/lib/karafka/contracts/consumer_group.rb +9 -1
- data/lib/karafka/contracts/topic.rb +3 -1
- data/lib/karafka/errors.rb +13 -0
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +3 -9
- data/lib/karafka/instrumentation/notifications.rb +19 -9
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
- data/lib/karafka/pro/base_consumer.rb +47 -0
- data/lib/karafka/pro/connection/manager.rb +300 -0
- data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
- data/lib/karafka/pro/iterator.rb +1 -6
- data/lib/karafka/pro/loader.rb +16 -2
- data/lib/karafka/pro/processing/coordinator.rb +2 -1
- data/lib/karafka/pro/processing/executor.rb +37 -0
- data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
- data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
- data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
- data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
- data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
- data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +136 -3
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
- data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
- data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
- data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
- data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
- data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
- data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
- data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
- data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
- data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
- data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
- data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
- data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
- data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
- data/lib/karafka/process.rb +5 -3
- data/lib/karafka/processing/coordinator.rb +5 -1
- data/lib/karafka/processing/executor.rb +43 -13
- data/lib/karafka/processing/executors_buffer.rb +22 -7
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +19 -8
- data/lib/karafka/processing/schedulers/default.rb +42 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +23 -7
- data/lib/karafka/processing/strategies/dlq.rb +36 -0
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/builder.rb +12 -2
- data/lib/karafka/routing/consumer_group.rb +5 -5
- data/lib/karafka/routing/features/base.rb +44 -8
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
- data/lib/karafka/routing/topic.rb +8 -10
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/runner.rb +13 -3
- data/lib/karafka/server.rb +5 -9
- data/lib/karafka/setup/config.rb +21 -1
- data/lib/karafka/status.rb +23 -14
- data/lib/karafka/templates/karafka.rb.erb +7 -0
- data/lib/karafka/time_trackers/partition_usage.rb +56 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +47 -13
- metadata.gz.sig +0 -0
- data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -74
- data/lib/karafka/processing/scheduler.rb +0 -38
@@ -16,12 +16,12 @@ module Karafka
|
|
16
16
|
module Processing
|
17
17
|
# Pro jobs builder that supports lrj
|
18
18
|
class JobsBuilder < ::Karafka::Processing::JobsBuilder
|
19
|
-
# @param executor [Karafka::Processing::Executor]
|
19
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
20
20
|
def idle(executor)
|
21
21
|
Karafka::Processing::Jobs::Idle.new(executor)
|
22
22
|
end
|
23
23
|
|
24
|
-
# @param executor [Karafka::Processing::Executor]
|
24
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
25
25
|
# @param messages [Karafka::Messages::Messages] messages batch to be consumed
|
26
26
|
# @return [Karafka::Processing::Jobs::Consume] blocking job
|
27
27
|
# @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
|
@@ -33,7 +33,7 @@ module Karafka
|
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
# @param executor [Karafka::Processing::Executor]
|
36
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
37
37
|
# @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
|
38
38
|
# @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
|
39
39
|
# non-blocking, so when revocation job is scheduled for LRJ it also will not block
|
@@ -44,6 +44,17 @@ module Karafka
|
|
44
44
|
super
|
45
45
|
end
|
46
46
|
end
|
47
|
+
|
48
|
+
# @param executor [Karafka::Pro::Processing::Executor]
|
49
|
+
# @return [Jobs::Periodic] Periodic job
|
50
|
+
# @return [Jobs::PeriodicNonBlocking] Periodic non-blocking job
|
51
|
+
def periodic(executor)
|
52
|
+
if executor.topic.long_running_job?
|
53
|
+
Jobs::PeriodicNonBlocking.new(executor)
|
54
|
+
else
|
55
|
+
Jobs::Periodic.new(executor)
|
56
|
+
end
|
57
|
+
end
|
47
58
|
end
|
48
59
|
end
|
49
60
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Offset Metadata support on the processing side
|
18
|
+
module OffsetMetadata
|
19
|
+
# Extra API methods for offset metadata fetching
|
20
|
+
# @note Part of this feature API is embedded directly into the strategies because it alters
|
21
|
+
# how marking methods (`#mark_as_consumed` and `#mark_as_consumed!`) operate. Because
|
22
|
+
# of that, they had to be embedded into the strategies.
|
23
|
+
module Consumer
|
24
|
+
# @param cache [Boolean] should we use cached result if present (true by default)
|
25
|
+
# @return [false, Object] false in case we do not own the partition anymore or
|
26
|
+
# deserialized metadata based on the deserializer
|
27
|
+
# @note Caching is on as the assumption here is, that most of the time user will be
|
28
|
+
# interested only in the offset metadata that "came" from the time prior to the
|
29
|
+
# rebalance. That is because the rest of the metadata (current) is created and
|
30
|
+
# controlled by the user himself, thus there is no need to retrieve it. In case this
|
31
|
+
# is not true and user wants to always get the Kafka metadata, `cache` value of this
|
32
|
+
# feature can be set to false.
|
33
|
+
def offset_metadata(cache: true)
|
34
|
+
return false if revoked?
|
35
|
+
|
36
|
+
Fetcher.find(topic, partition, cache: cache)
|
37
|
+
end
|
38
|
+
|
39
|
+
alias committed_offset_metadata offset_metadata
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Offset Metadata support on the processing side
|
18
|
+
module OffsetMetadata
|
19
|
+
# This fetcher is responsible for fetching and caching committed offsets metadata
|
20
|
+
# information.
|
21
|
+
#
|
22
|
+
# By design we fetch all information for a requested topic assignments. Not all topics from
|
23
|
+
# the same subscription group may need metadata and even if, we can run the few smaller
|
24
|
+
# queries. This approach prevents us from querying all assigned topics data in one go
|
25
|
+
# preventing excessive queries.
|
26
|
+
#
|
27
|
+
# Since the assumption is, that user will not have to reach out for the later metadata
|
28
|
+
# since it is produced in the context of a given consumer assignment, we can cache the
|
29
|
+
# initial result and only allow users for explicit invalidation.
|
30
|
+
class Fetcher
|
31
|
+
include Singleton
|
32
|
+
|
33
|
+
class << self
|
34
|
+
extend Forwardable
|
35
|
+
|
36
|
+
def_delegators :instance, :register, :clear, :find
|
37
|
+
end
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
@mutexes = {}
|
41
|
+
@clients = {}
|
42
|
+
@tpls = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# Registers a client of a given subscription group, so we can use it for queries later on
|
46
|
+
# @param client [Karafka::Connection::Client]
|
47
|
+
# @note Since we store the client reference and not the underlying rdkafka consumer
|
48
|
+
# instance, we do not have to deal with the recovery as it is abstracted away
|
49
|
+
def register(client)
|
50
|
+
@clients[client.subscription_group] = client
|
51
|
+
# We use one mutex per SG because independent SGs can query in parallel
|
52
|
+
@mutexes[client.subscription_group] = Mutex.new
|
53
|
+
@tpls[client.subscription_group] = {}
|
54
|
+
end
|
55
|
+
|
56
|
+
# Queries or retrieves from cache the given offset metadata for the selected partition
|
57
|
+
#
|
58
|
+
# @param topic [Karafka::Routing::Topic] routing topic with subscription group reference
|
59
|
+
# @param partition [Integer] partition for which we want to get stored offset metadata
|
60
|
+
# @param cache [Boolean] forces explicit query to Kafka when false and cache refresh.
|
61
|
+
# By default we use the setting from the topic level but this can be overwritten on
|
62
|
+
# a per request basis if needed.
|
63
|
+
# @return [Object, false] deserialized metadata (string deserializer by default) or
|
64
|
+
# false in case we were not able to obtain the details because we have lost the
|
65
|
+
# assignment
|
66
|
+
def find(topic, partition, cache: true)
|
67
|
+
cache = topic.offset_metadata.cache? && cache
|
68
|
+
|
69
|
+
tpls = fetch(topic, cache)
|
70
|
+
|
71
|
+
return false unless tpls
|
72
|
+
|
73
|
+
t_partitions = tpls.fetch(topic.name, [])
|
74
|
+
t_partition = t_partitions.find { |t_p| t_p.partition == partition }
|
75
|
+
|
76
|
+
# If we do not have given topic partition here, it means it is no longer part of our
|
77
|
+
# assignment and we should return false
|
78
|
+
return false unless t_partition
|
79
|
+
|
80
|
+
topic.offset_metadata.deserializer.call(t_partition.metadata)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Clears cache of a given subscription group. It is triggered on assignment changes.
|
84
|
+
#
|
85
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group that
|
86
|
+
# we want to clear.
|
87
|
+
def clear(subscription_group)
|
88
|
+
@mutexes.fetch(subscription_group).synchronize do
|
89
|
+
@tpls[subscription_group].clear
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
# Fetches from Kafka all committed offsets for the given topic partitions that are
|
96
|
+
# assigned to this process.
|
97
|
+
#
|
98
|
+
# We fetch all because in majority of the cases, the behavior of the end user code is
|
99
|
+
# not specific to a given partition both same for all. In such cases we save on
|
100
|
+
# querying as we get all data for all partitions in one go.
|
101
|
+
#
|
102
|
+
# @param topic [Karafka::Routing::Topic] topic for which we want to fetch tpls data
|
103
|
+
# @param cache [Boolean] should we return cached data if present
|
104
|
+
def fetch(topic, cache)
|
105
|
+
subscription_group = topic.subscription_group
|
106
|
+
t_tpls = @tpls.fetch(subscription_group, false)
|
107
|
+
t_tpl = t_tpls[topic]
|
108
|
+
|
109
|
+
return t_tpl if t_tpl && cache
|
110
|
+
|
111
|
+
assigned_tpls = @clients.fetch(subscription_group).assignment
|
112
|
+
t_tpl = assigned_tpls.to_h.fetch(topic.name, false)
|
113
|
+
|
114
|
+
# May be false in case we lost given assignment but still run LRJ
|
115
|
+
return false unless t_tpl
|
116
|
+
return false if t_tpl.empty?
|
117
|
+
|
118
|
+
@mutexes.fetch(subscription_group).synchronize do
|
119
|
+
rd_tpl = Rdkafka::Consumer::TopicPartitionList.new(topic.name => t_tpl)
|
120
|
+
|
121
|
+
# While in theory we could lost assignment while being here, this will work and will
|
122
|
+
# return us proper tpl, we do not deal with this case on this layer and report anyhow
|
123
|
+
# There will not be any exception and this will operate correctly
|
124
|
+
t_tpls[topic] = @clients.fetch(subscription_group).committed(rd_tpl).to_h
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
module OffsetMetadata
|
18
|
+
# Keeps track of rebalances and updates the fetcher
|
19
|
+
# Since we cache the tpls with metadata, we need to invalidate them on events that would
|
20
|
+
# cause changes in the assignments
|
21
|
+
class Listener
|
22
|
+
# When we start listening we need to register this client in the metadata fetcher, so
|
23
|
+
# we have the client related to a given subscription group that we can use in fetcher
|
24
|
+
# since fetcher may be used in filtering API and other places outside of the standard
|
25
|
+
# consumer flow
|
26
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
27
|
+
def on_connection_listener_before_fetch_loop(event)
|
28
|
+
Fetcher.register event[:client]
|
29
|
+
end
|
30
|
+
|
31
|
+
# Invalidates internal cache when assignments change so we can get correct metadata
|
32
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
33
|
+
def on_rebalance_partitions_assigned(event)
|
34
|
+
Fetcher.clear event[:subscription_group]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Invalidates internal cache when assignments change so we can get correct metadata
|
38
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
39
|
+
def on_rebalance_partitions_revoked(event)
|
40
|
+
Fetcher.clear event[:subscription_group]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Namespace for Pro schedulers related components
|
18
|
+
module Schedulers
|
19
|
+
# Base for all the Pro custom schedulers
|
20
|
+
#
|
21
|
+
# It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
|
22
|
+
# scheduling other work that could impact the decision making in between multiple
|
23
|
+
# subscription groups running in separate threads.
|
24
|
+
#
|
25
|
+
# @note All the `on_` methods can be redefined with a non-thread-safe versions without
|
26
|
+
# locks if needed, however when doing so, ensure that your scheduler is stateless.
|
27
|
+
class Base
|
28
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
29
|
+
def initialize(queue)
|
30
|
+
@queue = queue
|
31
|
+
@mutex = Mutex.new
|
32
|
+
end
|
33
|
+
|
34
|
+
# Schedules any jobs provided in a fifo order
|
35
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
|
36
|
+
def schedule_fifo(jobs_array)
|
37
|
+
jobs_array.each do |job|
|
38
|
+
@queue << job
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Runs the consumption jobs scheduling flow under a mutex
|
43
|
+
#
|
44
|
+
# @param jobs_array
|
45
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
46
|
+
# jobs for scheduling
|
47
|
+
def on_schedule_consumption(jobs_array)
|
48
|
+
@mutex.synchronize do
|
49
|
+
schedule_consumption(jobs_array)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Should schedule the consumption jobs
|
54
|
+
#
|
55
|
+
# @param _jobs_array
|
56
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
57
|
+
# jobs for scheduling
|
58
|
+
def schedule_consumption(_jobs_array)
|
59
|
+
raise NotImplementedError, 'Implement in a subclass'
|
60
|
+
end
|
61
|
+
|
62
|
+
# Runs the revocation jobs scheduling flow under a mutex
|
63
|
+
#
|
64
|
+
# @param jobs_array
|
65
|
+
# [Array<Karafka::Processing::Jobs::Revoked, Processing::Jobs::RevokedNonBlocking>]
|
66
|
+
# jobs for scheduling
|
67
|
+
def on_schedule_revocation(jobs_array)
|
68
|
+
@mutex.synchronize do
|
69
|
+
schedule_revocation(jobs_array)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Runs the shutdown jobs scheduling flow under a mutex
|
74
|
+
#
|
75
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Shutdown>] jobs for scheduling
|
76
|
+
def on_schedule_shutdown(jobs_array)
|
77
|
+
@mutex.synchronize do
|
78
|
+
schedule_shutdown(jobs_array)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Runs the idle jobs scheduling flow under a mutex
|
83
|
+
#
|
84
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Idle>] jobs for scheduling
|
85
|
+
def on_schedule_idle(jobs_array)
|
86
|
+
@mutex.synchronize do
|
87
|
+
schedule_idle(jobs_array)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Runs the periodic jobs scheduling flow under a mutex
|
92
|
+
#
|
93
|
+
# @param jobs_array
|
94
|
+
# [Array<Processing::Jobs::Periodic, Processing::Jobs::PeriodicNonBlocking>]
|
95
|
+
# jobs for scheduling
|
96
|
+
def on_schedule_periodic(jobs_array)
|
97
|
+
@mutex.synchronize do
|
98
|
+
schedule_periodic(jobs_array)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Schedule by default all except consumption as fifo
|
103
|
+
alias schedule_revocation schedule_fifo
|
104
|
+
alias schedule_shutdown schedule_fifo
|
105
|
+
alias schedule_idle schedule_fifo
|
106
|
+
alias schedule_periodic schedule_fifo
|
107
|
+
|
108
|
+
# Runs the manage tick under mutex
|
109
|
+
def on_manage
|
110
|
+
@mutex.synchronize { manage }
|
111
|
+
end
|
112
|
+
|
113
|
+
# Should manage scheduling on jobs state changes
|
114
|
+
#
|
115
|
+
# By default does nothing as default schedulers are stateless
|
116
|
+
def manage
|
117
|
+
nil
|
118
|
+
end
|
119
|
+
|
120
|
+
# Runs clearing under mutex
|
121
|
+
#
|
122
|
+
# @param group_id [String] Subscription group id
|
123
|
+
def on_clear(group_id)
|
124
|
+
@mutex.synchronize { clear(group_id) }
|
125
|
+
end
|
126
|
+
|
127
|
+
# By default schedulers are stateless, so nothing to clear.
|
128
|
+
#
|
129
|
+
# @param _group_id [String] Subscription group id
|
130
|
+
def clear(_group_id)
|
131
|
+
nil
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
# @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
|
137
|
+
# inside of the scheduler
|
138
|
+
attr_reader :queue
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Namespace for Pro schedulers
|
18
|
+
module Schedulers
|
19
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
20
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
21
|
+
#
|
22
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations
|
23
|
+
# as when taking IO into consideration, the can achieve optimized parallel processing.
|
24
|
+
#
|
25
|
+
# This scheduler can also work with virtual partitions.
|
26
|
+
#
|
27
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
28
|
+
# default FIFO scheduler from the default Karafka scheduler
|
29
|
+
#
|
30
|
+
# @note This is a stateless scheduler, thus we can override the `#on_` API.
|
31
|
+
class Default < Base
|
32
|
+
# Schedules jobs in the LJF order for consumption
|
33
|
+
#
|
34
|
+
# @param jobs_array
|
35
|
+
# [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
|
36
|
+
# jobs for scheduling
|
37
|
+
def on_schedule_consumption(jobs_array)
|
38
|
+
perf_tracker = Instrumentation::PerformanceTracker.instance
|
39
|
+
|
40
|
+
ordered = []
|
41
|
+
|
42
|
+
jobs_array.each do |job|
|
43
|
+
ordered << [
|
44
|
+
job,
|
45
|
+
processing_cost(perf_tracker, job)
|
46
|
+
]
|
47
|
+
end
|
48
|
+
|
49
|
+
ordered.sort_by!(&:last)
|
50
|
+
ordered.reverse!
|
51
|
+
ordered.map!(&:first)
|
52
|
+
|
53
|
+
ordered.each do |job|
|
54
|
+
@queue << job
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Schedules any jobs provided in a fifo order
|
59
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
|
60
|
+
def schedule_fifo(jobs_array)
|
61
|
+
jobs_array.each do |job|
|
62
|
+
@queue << job
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# By default all non-consumption work is scheduled in a fifo order
|
67
|
+
alias on_schedule_revocation schedule_fifo
|
68
|
+
alias on_schedule_shutdown schedule_fifo
|
69
|
+
alias on_schedule_idle schedule_fifo
|
70
|
+
alias on_schedule_periodic schedule_fifo
|
71
|
+
|
72
|
+
# This scheduler does not have anything to manage as it is a pass through and has no
|
73
|
+
# state
|
74
|
+
def on_manage
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
|
78
|
+
# This scheduler does not need to be cleared because it is stateless
|
79
|
+
#
|
80
|
+
# @param _group_id [String] Subscription group id
|
81
|
+
def on_clear(_group_id)
|
82
|
+
nil
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
# @param perf_tracker [PerformanceTracker]
|
88
|
+
# @param job [Karafka::Processing::Jobs::Consume] job we will be processing
|
89
|
+
# @return [Numeric] estimated cost of processing this job
|
90
|
+
def processing_cost(perf_tracker, job)
|
91
|
+
if job.is_a?(::Karafka::Processing::Jobs::Consume)
|
92
|
+
messages = job.messages
|
93
|
+
message = messages.first
|
94
|
+
|
95
|
+
perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
|
96
|
+
else
|
97
|
+
# LJF will set first the most expensive, but we want to run the zero cost jobs
|
98
|
+
# related to the lifecycle always first. That is why we "emulate" that they
|
99
|
+
# the longest possible jobs that anyone can run
|
100
|
+
Float::INFINITY
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -27,9 +27,127 @@ module Karafka
|
|
27
27
|
# Apply strategy for a non-feature based flow
|
28
28
|
FEATURES = %i[].freeze
|
29
29
|
|
30
|
+
# Marks message as consumed in an async way.
|
31
|
+
#
|
32
|
+
# @param message [Messages::Message] last successfully processed message.
|
33
|
+
# @param offset_metadata [String, nil] offset metadata string or nil if nothing
|
34
|
+
# @return [Boolean] true if we were able to mark the offset, false otherwise.
|
35
|
+
# False indicates that we were not able and that we have lost the partition.
|
36
|
+
#
|
37
|
+
# @note We keep track of this offset in case we would mark as consumed and got error when
|
38
|
+
# processing another message. In case like this we do not pause on the message we've
|
39
|
+
# already processed but rather at the next one. This applies to both sync and async
|
40
|
+
# versions of this method.
|
41
|
+
def mark_as_consumed(message, offset_metadata = nil)
|
42
|
+
if @_in_transaction
|
43
|
+
mark_in_transaction(message, offset_metadata, true)
|
44
|
+
else
|
45
|
+
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
46
|
+
# In case like this we ignore marking
|
47
|
+
return true if coordinator.seek_offset.nil?
|
48
|
+
# Ignore earlier offsets than the one we already committed
|
49
|
+
return true if coordinator.seek_offset > message.offset
|
50
|
+
return false if revoked?
|
51
|
+
return revoked? unless client.mark_as_consumed(message, offset_metadata)
|
52
|
+
|
53
|
+
coordinator.seek_offset = message.offset + 1
|
54
|
+
end
|
55
|
+
|
56
|
+
true
|
57
|
+
end
|
58
|
+
|
59
|
+
# Marks message as consumed in a sync way.
|
60
|
+
#
|
61
|
+
# @param message [Messages::Message] last successfully processed message.
|
62
|
+
# @param offset_metadata [String, nil] offset metadata string or nil if nothing
|
63
|
+
# @return [Boolean] true if we were able to mark the offset, false otherwise.
|
64
|
+
# False indicates that we were not able and that we have lost the partition.
|
65
|
+
def mark_as_consumed!(message, offset_metadata = nil)
|
66
|
+
if @_in_transaction
|
67
|
+
mark_in_transaction(message, offset_metadata, false)
|
68
|
+
else
|
69
|
+
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
70
|
+
# In case like this we ignore marking
|
71
|
+
return true if coordinator.seek_offset.nil?
|
72
|
+
# Ignore earlier offsets than the one we already committed
|
73
|
+
return true if coordinator.seek_offset > message.offset
|
74
|
+
return false if revoked?
|
75
|
+
|
76
|
+
return revoked? unless client.mark_as_consumed!(message, offset_metadata)
|
77
|
+
|
78
|
+
coordinator.seek_offset = message.offset + 1
|
79
|
+
end
|
80
|
+
|
81
|
+
true
|
82
|
+
end
|
83
|
+
|
84
|
+
# Starts producer transaction, saves the transaction context for transactional marking
|
85
|
+
# and runs user code in this context
|
86
|
+
#
|
87
|
+
# Transactions on a consumer level differ from those initiated by the producer as they
|
88
|
+
# allow to mark offsets inside of the transaction. If the transaction is initialized
|
89
|
+
# only from the consumer, the offset will be stored in a regular fashion.
|
90
|
+
#
|
91
|
+
# @param block [Proc] code that we want to run in a transaction
|
92
|
+
def transaction(&block)
|
93
|
+
transaction_started = false
|
94
|
+
|
95
|
+
# Prevent from nested transactions. It would not make any sense
|
96
|
+
raise Errors::TransactionAlreadyInitializedError if @_in_transaction
|
97
|
+
|
98
|
+
transaction_started = true
|
99
|
+
@_transaction_marked = []
|
100
|
+
@_in_transaction = true
|
101
|
+
|
102
|
+
producer.transaction(&block)
|
103
|
+
|
104
|
+
@_in_transaction = false
|
105
|
+
|
106
|
+
# This offset is already stored in transaction but we set it here anyhow because we
|
107
|
+
# want to make sure our internal in-memory state is aligned with the transaction
|
108
|
+
#
|
109
|
+
# @note We never need to use the blocking `#mark_as_consumed!` here because the offset
|
110
|
+
# anyhow was already stored during the transaction
|
111
|
+
#
|
112
|
+
# @note In theory we could only keep reference to the most recent marking and reject
|
113
|
+
# others. We however do not do it for two reasons:
|
114
|
+
# - User may have non standard flow relying on some alternative order and we want to
|
115
|
+
# mimic this
|
116
|
+
# - Complex strategies like VPs can use this in VPs to mark in parallel without
|
117
|
+
# having to redefine the transactional flow completely
|
118
|
+
@_transaction_marked.each do |marking|
|
119
|
+
marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
|
120
|
+
end
|
121
|
+
ensure
|
122
|
+
if transaction_started
|
123
|
+
@_transaction_marked.clear
|
124
|
+
@_in_transaction = false
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Stores the next offset for processing inside of the transaction and stores it in a
|
129
|
+
# local accumulator for post-transaction status update
|
130
|
+
#
|
131
|
+
# @param message [Messages::Message] message we want to commit inside of a transaction
|
132
|
+
# @param offset_metadata [String, nil] offset metadata or nil if none
|
133
|
+
# @param async [Boolean] should we mark in async or sync way (applicable only to post
|
134
|
+
# transaction state synchronization usage as within transaction it is always sync)
|
135
|
+
def mark_in_transaction(message, offset_metadata, async)
|
136
|
+
raise Errors::TransactionRequiredError unless @_in_transaction
|
137
|
+
|
138
|
+
producer.transaction_mark_as_consumed(
|
139
|
+
client,
|
140
|
+
message,
|
141
|
+
offset_metadata
|
142
|
+
)
|
143
|
+
|
144
|
+
@_transaction_marked ||= []
|
145
|
+
@_transaction_marked << [message, offset_metadata, async]
|
146
|
+
end
|
147
|
+
|
30
148
|
# No actions needed for the standard flow here
|
31
|
-
def
|
32
|
-
Karafka.monitor.instrument('consumer.
|
149
|
+
def handle_before_schedule_consume
|
150
|
+
Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
|
33
151
|
|
34
152
|
nil
|
35
153
|
end
|
@@ -87,7 +205,7 @@ module Karafka
|
|
87
205
|
end
|
88
206
|
end
|
89
207
|
|
90
|
-
# Standard
|
208
|
+
# Standard flow for revocation
|
91
209
|
def handle_revoked
|
92
210
|
coordinator.on_revoked do
|
93
211
|
resume
|
@@ -100,6 +218,21 @@ module Karafka
|
|
100
218
|
revoked
|
101
219
|
end
|
102
220
|
end
|
221
|
+
|
222
|
+
# No action needed for the tick standard flow
|
223
|
+
def handle_before_schedule_tick
|
224
|
+
Karafka.monitor.instrument('consumer.before_schedule_tick', caller: self)
|
225
|
+
|
226
|
+
nil
|
227
|
+
end
|
228
|
+
|
229
|
+
# Runs the consumer `#tick` method with reporting
|
230
|
+
def handle_tick
|
231
|
+
Karafka.monitor.instrument('consumer.tick', caller: self)
|
232
|
+
Karafka.monitor.instrument('consumer.ticked', caller: self) do
|
233
|
+
tick
|
234
|
+
end
|
235
|
+
end
|
103
236
|
end
|
104
237
|
end
|
105
238
|
end
|