karafka 2.2.12 → 2.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +141 -121
- data/Gemfile.lock +10 -10
- data/config/locales/errors.yml +2 -1
- data/docker-compose.yml +2 -0
- data/lib/karafka/admin.rb +109 -3
- data/lib/karafka/app.rb +7 -0
- data/lib/karafka/base_consumer.rb +23 -30
- data/lib/karafka/connection/client.rb +13 -10
- data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
- data/lib/karafka/connection/listener.rb +18 -10
- data/lib/karafka/connection/listeners_batch.rb +6 -1
- data/lib/karafka/contracts/config.rb +2 -1
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +0 -9
- data/lib/karafka/instrumentation/notifications.rb +6 -3
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/loader.rb +3 -2
- data/lib/karafka/pro/processing/coordinator.rb +12 -6
- data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +3 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +10 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +9 -5
- data/lib/karafka/processing/coordinator.rb +13 -7
- data/lib/karafka/processing/executor.rb +27 -3
- data/lib/karafka/processing/executors_buffer.rb +3 -3
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +45 -17
- data/lib/karafka/processing/schedulers/default.rb +41 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +17 -5
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/builder.rb +32 -17
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/subscription_group.rb +11 -6
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +5 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- data.tar.gz.sig +0 -0
- metadata +8 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -75
- data/lib/karafka/processing/scheduler.rb +0 -22
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
# Namespace for Pro components instrumentation related code
|
17
|
+
module Instrumentation
|
18
|
+
# Tracker used to keep track of performance metrics
|
19
|
+
# It provides insights that can be used to optimize processing flow
|
20
|
+
# @note Even if we have some race-conditions here it is relevant due to the quantity of data.
|
21
|
+
# This is why we do not mutex it.
|
22
|
+
class PerformanceTracker
|
23
|
+
include Singleton
|
24
|
+
|
25
|
+
# How many samples do we collect per topic partition
|
26
|
+
SAMPLES_COUNT = 200
|
27
|
+
|
28
|
+
private_constant :SAMPLES_COUNT
|
29
|
+
|
30
|
+
# Builds up nested concurrent hash for data tracking
|
31
|
+
def initialize
|
32
|
+
@processing_times = Hash.new do |topics_hash, topic|
|
33
|
+
topics_hash[topic] = Hash.new do |partitions_hash, partition|
|
34
|
+
partitions_hash[partition] = []
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# @param topic [String]
|
40
|
+
# @param partition [Integer]
|
41
|
+
# @return [Float] p95 processing time of a single message from a single topic partition
|
42
|
+
def processing_time_p95(topic, partition)
|
43
|
+
values = @processing_times[topic][partition]
|
44
|
+
|
45
|
+
return 0 if values.empty?
|
46
|
+
return values.first if values.size == 1
|
47
|
+
|
48
|
+
percentile(0.95, values)
|
49
|
+
end
|
50
|
+
|
51
|
+
# @private
|
52
|
+
# @param event [Karafka::Core::Monitoring::Event] event details
|
53
|
+
# Tracks time taken to process a single message of a given topic partition
|
54
|
+
def on_consumer_consumed(event)
|
55
|
+
consumer = event[:caller]
|
56
|
+
messages = consumer.messages
|
57
|
+
topic = messages.metadata.topic
|
58
|
+
partition = messages.metadata.partition
|
59
|
+
|
60
|
+
samples = @processing_times[topic][partition]
|
61
|
+
samples << event[:time] / messages.count
|
62
|
+
|
63
|
+
return unless samples.size > SAMPLES_COUNT
|
64
|
+
|
65
|
+
samples.shift
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
# Computers the requested percentile out of provided values
|
71
|
+
# @param percentile [Float]
|
72
|
+
# @param values [Array<String>] all the values based on which we should
|
73
|
+
# @return [Float] computed percentile
|
74
|
+
def percentile(percentile, values)
|
75
|
+
values_sorted = values.sort
|
76
|
+
|
77
|
+
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
78
|
+
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
79
|
+
|
80
|
+
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -84,7 +84,8 @@ module Karafka
|
|
84
84
|
|
85
85
|
icfg.processing.coordinator_class = Processing::Coordinator
|
86
86
|
icfg.processing.partitioner_class = Processing::Partitioner
|
87
|
-
icfg.processing.
|
87
|
+
icfg.processing.scheduler_class = Processing::Schedulers::Default
|
88
|
+
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
88
89
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
89
90
|
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
90
91
|
|
@@ -92,7 +93,7 @@ module Karafka
|
|
92
93
|
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
93
94
|
icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
94
95
|
|
95
|
-
config.monitor.subscribe(PerformanceTracker.instance)
|
96
|
+
config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
|
96
97
|
end
|
97
98
|
|
98
99
|
# Loads the Pro features of Karafka
|
@@ -21,14 +21,20 @@ module Karafka
|
|
21
21
|
|
22
22
|
def_delegators :@collapser, :collapsed?, :collapse_until!
|
23
23
|
|
24
|
-
attr_reader :filter, :virtual_offset_manager
|
24
|
+
attr_reader :filter, :virtual_offset_manager, :shared_mutex
|
25
25
|
|
26
26
|
# @param args [Object] anything the base coordinator accepts
|
27
27
|
def initialize(*args)
|
28
28
|
super
|
29
29
|
|
30
30
|
@executed = []
|
31
|
-
@
|
31
|
+
@flow_mutex = Mutex.new
|
32
|
+
# Lock for user code synchronization
|
33
|
+
# We do not want to mix coordinator lock with the user lock not to create cases where
|
34
|
+
# user imposed lock would lock the internal operations of Karafka
|
35
|
+
# This shared lock can be used by the end user as it is not used internally by the
|
36
|
+
# framework and can be used for user-facing locking
|
37
|
+
@shared_mutex = Mutex.new
|
32
38
|
@collapser = Collapser.new
|
33
39
|
@filter = FiltersApplier.new(self)
|
34
40
|
|
@@ -89,7 +95,7 @@ module Karafka
|
|
89
95
|
# Runs synchronized code once for a collective of virtual partitions prior to work being
|
90
96
|
# enqueued
|
91
97
|
def on_enqueued
|
92
|
-
@
|
98
|
+
@flow_mutex.synchronize do
|
93
99
|
return unless executable?(:on_enqueued)
|
94
100
|
|
95
101
|
yield(@last_message)
|
@@ -98,7 +104,7 @@ module Karafka
|
|
98
104
|
|
99
105
|
# Runs given code only once per all the coordinated jobs upon starting first of them
|
100
106
|
def on_started
|
101
|
-
@
|
107
|
+
@flow_mutex.synchronize do
|
102
108
|
return unless executable?(:on_started)
|
103
109
|
|
104
110
|
yield(@last_message)
|
@@ -109,7 +115,7 @@ module Karafka
|
|
109
115
|
# It runs once per all the coordinated jobs and should be used to run any type of post
|
110
116
|
# jobs coordination processing execution
|
111
117
|
def on_finished
|
112
|
-
@
|
118
|
+
@flow_mutex.synchronize do
|
113
119
|
return unless finished?
|
114
120
|
return unless executable?(:on_finished)
|
115
121
|
|
@@ -119,7 +125,7 @@ module Karafka
|
|
119
125
|
|
120
126
|
# Runs once after a partition is revoked
|
121
127
|
def on_revoked
|
122
|
-
@
|
128
|
+
@flow_mutex.synchronize do
|
123
129
|
return unless executable?(:on_revoked)
|
124
130
|
|
125
131
|
yield(@last_message)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Enhanced processing queue that provides ability to build complex work-distribution
|
18
|
+
# schedulers dedicated to particular job types
|
19
|
+
#
|
20
|
+
# Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
|
21
|
+
class JobsQueue < Karafka::Processing::JobsQueue
|
22
|
+
attr_accessor :in_processing
|
23
|
+
|
24
|
+
# @return [Karafka::Pro::Processing::JobsQueue]
|
25
|
+
def initialize
|
26
|
+
super
|
27
|
+
|
28
|
+
@in_waiting = Hash.new { |h, k| h[k] = [] }
|
29
|
+
|
30
|
+
@statistics[:waiting] = 0
|
31
|
+
end
|
32
|
+
|
33
|
+
# Method that allows us to lock queue on a given subscription group without enqueuing the a
|
34
|
+
# job. This can be used when building complex schedulers that want to postpone enqueuing
|
35
|
+
# before certain conditions are met.
|
36
|
+
#
|
37
|
+
# @param job [Jobs::Base] job used for locking
|
38
|
+
def lock(job)
|
39
|
+
@mutex.synchronize do
|
40
|
+
group = @in_waiting[job.group_id]
|
41
|
+
|
42
|
+
# This should never happen. Same job should not be locked twice
|
43
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
|
44
|
+
|
45
|
+
@statistics[:waiting] += 1
|
46
|
+
|
47
|
+
group << job
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Method for unlocking the given subscription group queue space that was locked with a
|
52
|
+
# given job that was **not** added to the queue but used via `#lock`.
|
53
|
+
#
|
54
|
+
# @param job [Jobs::Base] job that locked the queue
|
55
|
+
def unlock(job)
|
56
|
+
@mutex.synchronize do
|
57
|
+
@statistics[:waiting] -= 1
|
58
|
+
|
59
|
+
return if @in_waiting[job.group_id].delete(job)
|
60
|
+
|
61
|
+
# This should never happen. It means there was a job being unlocked that was never
|
62
|
+
# locked in the first place
|
63
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Clears the processing states for a provided group. Useful when a recovery happens and we
|
68
|
+
# need to clean up state but only for a given subscription group.
|
69
|
+
#
|
70
|
+
# @param group_id [String]
|
71
|
+
def clear(group_id)
|
72
|
+
@mutex.synchronize do
|
73
|
+
@in_processing[group_id].clear
|
74
|
+
|
75
|
+
@statistics[:waiting] -= @in_waiting[group_id].size
|
76
|
+
@in_waiting[group_id].clear
|
77
|
+
|
78
|
+
# We unlock it just in case it was blocked when clearing started
|
79
|
+
tick(group_id)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# @param group_id [String]
|
84
|
+
#
|
85
|
+
# @return [Boolean] tell us if we have anything in the processing (or for processing) from
|
86
|
+
# a given group.
|
87
|
+
def empty?(group_id)
|
88
|
+
@mutex.synchronize do
|
89
|
+
@in_processing[group_id].empty? &&
|
90
|
+
@in_waiting[group_id].empty?
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# @param group_id [String] id of the group in which jobs we're interested.
|
97
|
+
# @return [Boolean] should we keep waiting or not
|
98
|
+
# @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
|
99
|
+
# as they may exceed `max.poll.interval`
|
100
|
+
def wait?(group_id)
|
101
|
+
!(
|
102
|
+
@in_processing[group_id].all?(&:non_blocking?) &&
|
103
|
+
@in_waiting[group_id].all?(&:non_blocking?)
|
104
|
+
)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Namespace for Pro schedulers related components
|
18
|
+
module Schedulers
|
19
|
+
# Base for all the Pro custom schedulers
|
20
|
+
#
|
21
|
+
# It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
|
22
|
+
# scheduling other work that could impact the decision making in between multiple
|
23
|
+
# subscription groups running in separate threads.
|
24
|
+
#
|
25
|
+
# @note All the `on_` methods can be redefined with a non-thread-safe versions without
|
26
|
+
# locks if needed, however when doing so, ensure that your scheduler is stateless.
|
27
|
+
class Base
|
28
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
29
|
+
def initialize(queue)
|
30
|
+
@queue = queue
|
31
|
+
@mutex = Mutex.new
|
32
|
+
end
|
33
|
+
|
34
|
+
# Runs the consumption jobs scheduling flow under a mutex
|
35
|
+
#
|
36
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
37
|
+
def on_schedule_consumption(jobs_array)
|
38
|
+
@mutex.synchronize do
|
39
|
+
schedule_consumption(jobs_array)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Should schedule the consumption jobs
|
44
|
+
#
|
45
|
+
# @param _jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
46
|
+
def schedule_consumption(_jobs_array)
|
47
|
+
raise NotImplementedError, 'Implement in a subclass'
|
48
|
+
end
|
49
|
+
|
50
|
+
# Runs the revocation jobs scheduling flow under a mutex
|
51
|
+
#
|
52
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
53
|
+
def on_schedule_revocation(jobs_array)
|
54
|
+
@mutex.synchronize do
|
55
|
+
schedule_revocation(jobs_array)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Schedules the revocation jobs.
|
60
|
+
#
|
61
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
62
|
+
#
|
63
|
+
# @note We provide a default scheduler logic here because by default revocation jobs
|
64
|
+
# should be scheduled as fast as possible.
|
65
|
+
def schedule_revocation(jobs_array)
|
66
|
+
jobs_array.each do |job|
|
67
|
+
@queue << job
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Runs the shutdown jobs scheduling flow under a mutex
|
72
|
+
#
|
73
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
74
|
+
def on_schedule_shutdown(jobs_array)
|
75
|
+
@mutex.synchronize do
|
76
|
+
schedule_shutdown(jobs_array)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Schedules the shutdown jobs.
|
81
|
+
#
|
82
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
83
|
+
#
|
84
|
+
# @note We provide a default scheduler logic here because by default revocation jobs
|
85
|
+
# should be scheduled as fast as possible.
|
86
|
+
def schedule_shutdown(jobs_array)
|
87
|
+
jobs_array.each do |job|
|
88
|
+
@queue << job
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Runs the manage tick under mutex
|
93
|
+
def on_manage
|
94
|
+
@mutex.synchronize { manage }
|
95
|
+
end
|
96
|
+
|
97
|
+
# Should manage scheduling on jobs state changes
|
98
|
+
#
|
99
|
+
# By default does nothing as default schedulers are stateless
|
100
|
+
def manage
|
101
|
+
nil
|
102
|
+
end
|
103
|
+
|
104
|
+
# Runs clearing under mutex
|
105
|
+
#
|
106
|
+
# @param group_id [String] Subscription group id
|
107
|
+
def on_clear(group_id)
|
108
|
+
@mutex.synchronize { clear(group_id) }
|
109
|
+
end
|
110
|
+
|
111
|
+
# By default schedulers are stateless, so nothing to clear.
|
112
|
+
#
|
113
|
+
# @param _group_id [String] Subscription group id
|
114
|
+
def clear(_group_id)
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
# @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
|
121
|
+
# inside of the scheduler
|
122
|
+
attr_reader :queue
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Namespace for Pro schedulers
|
18
|
+
module Schedulers
|
19
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
20
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
21
|
+
#
|
22
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations
|
23
|
+
# as when taking IO into consideration, the can achieve optimized parallel processing.
|
24
|
+
#
|
25
|
+
# This scheduler can also work with virtual partitions.
|
26
|
+
#
|
27
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
28
|
+
# default FIFO scheduler from the default Karafka scheduler
|
29
|
+
#
|
30
|
+
# @note This is a stateless scheduler, thus we can override the `#on_` API.
|
31
|
+
class Default < Base
|
32
|
+
# Schedules jobs in the LJF order for consumption
|
33
|
+
#
|
34
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
35
|
+
def on_schedule_consumption(jobs_array)
|
36
|
+
perf_tracker = Instrumentation::PerformanceTracker.instance
|
37
|
+
|
38
|
+
ordered = []
|
39
|
+
|
40
|
+
jobs_array.each do |job|
|
41
|
+
ordered << [
|
42
|
+
job,
|
43
|
+
processing_cost(perf_tracker, job)
|
44
|
+
]
|
45
|
+
end
|
46
|
+
|
47
|
+
ordered.sort_by!(&:last)
|
48
|
+
ordered.reverse!
|
49
|
+
ordered.map!(&:first)
|
50
|
+
|
51
|
+
ordered.each do |job|
|
52
|
+
@queue << job
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Schedules jobs in the fifo order
|
57
|
+
#
|
58
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
59
|
+
def on_schedule_revocation(jobs_array)
|
60
|
+
jobs_array.each do |job|
|
61
|
+
@queue << job
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Schedules jobs in the fifo order
|
66
|
+
#
|
67
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
68
|
+
def on_schedule_shutdown(jobs_array)
|
69
|
+
jobs_array.each do |job|
|
70
|
+
@queue << job
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# This scheduler does not have anything to manage as it is a pass through and has no
|
75
|
+
# state
|
76
|
+
def on_manage
|
77
|
+
nil
|
78
|
+
end
|
79
|
+
|
80
|
+
# This scheduler does not need to be cleared because it is stateless
|
81
|
+
#
|
82
|
+
# @param _group_id [String] Subscription group id
|
83
|
+
def on_clear(_group_id)
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
# @param perf_tracker [PerformanceTracker]
|
90
|
+
# @param job [Karafka::Processing::Jobs::Base] job we will be processing
|
91
|
+
# @return [Numeric] estimated cost of processing this job
|
92
|
+
def processing_cost(perf_tracker, job)
|
93
|
+
if job.is_a?(::Karafka::Processing::Jobs::Consume)
|
94
|
+
messages = job.messages
|
95
|
+
message = messages.first
|
96
|
+
|
97
|
+
perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
|
98
|
+
else
|
99
|
+
# LJF will set first the most expensive, but we want to run the zero cost jobs
|
100
|
+
# related to the lifecycle always first. That is why we "emulate" that they
|
101
|
+
# the longest possible jobs that anyone can run
|
102
|
+
Float::INFINITY
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -28,7 +28,9 @@ module Karafka
|
|
28
28
|
FEATURES = %i[].freeze
|
29
29
|
|
30
30
|
# No actions needed for the standard flow here
|
31
|
-
def
|
31
|
+
def handle_before_schedule_consume
|
32
|
+
Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
|
33
|
+
|
32
34
|
nil
|
33
35
|
end
|
34
36
|
|
@@ -29,7 +29,7 @@ module Karafka
|
|
29
29
|
].freeze
|
30
30
|
|
31
31
|
# We always need to pause prior to doing any jobs for LRJ
|
32
|
-
def
|
32
|
+
def handle_before_schedule_consume
|
33
33
|
super
|
34
34
|
|
35
35
|
# This ensures that when running LRJ with VP, things operate as expected run only
|
@@ -77,6 +77,15 @@ module Karafka
|
|
77
77
|
revoked
|
78
78
|
end
|
79
79
|
end
|
80
|
+
|
81
|
+
# Allows for LRJ to synchronize its work. It may be needed because LRJ can run
|
82
|
+
# lifecycle events like revocation while the LRJ work is running and there may be a
|
83
|
+
# need for a critical section.
|
84
|
+
#
|
85
|
+
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
86
|
+
def synchronize(&block)
|
87
|
+
coordinator.shared_mutex.synchronize(&block)
|
88
|
+
end
|
80
89
|
end
|
81
90
|
end
|
82
91
|
end
|
@@ -94,13 +94,15 @@ module Karafka
|
|
94
94
|
|
95
95
|
# Allows for cross-virtual-partition consumers locks
|
96
96
|
#
|
97
|
-
# This is not needed in the non-VP flows because there is always only one
|
98
|
-
# per partition at the same time, so no coordination is needed directly for
|
99
|
-
# end users
|
97
|
+
# This is not needed in the non-VP flows except LRJ because there is always only one
|
98
|
+
# consumer per partition at the same time, so no coordination is needed directly for
|
99
|
+
# the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
|
100
|
+
# because lifecycle events on revocation can run in parallel to the LRJ job as it is
|
101
|
+
# non-blocking.
|
100
102
|
#
|
101
103
|
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
102
104
|
def synchronize(&block)
|
103
|
-
coordinator.synchronize(&block)
|
105
|
+
coordinator.shared_mutex.synchronize(&block)
|
104
106
|
end
|
105
107
|
|
106
108
|
private
|
@@ -110,7 +112,9 @@ module Karafka
|
|
110
112
|
#
|
111
113
|
# @note This can be done without the mutex, because it happens from the same thread
|
112
114
|
# for all the work (listener thread)
|
113
|
-
def
|
115
|
+
def handle_before_schedule_consume
|
116
|
+
super
|
117
|
+
|
114
118
|
coordinator.virtual_offset_manager.register(
|
115
119
|
messages.map(&:offset)
|
116
120
|
)
|
@@ -162,11 +162,24 @@ module Karafka
|
|
162
162
|
@manual_seek
|
163
163
|
end
|
164
164
|
|
165
|
+
# @param consumer [Object] karafka consumer (normal or pro)
|
166
|
+
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
167
|
+
# consumption processing state.
|
168
|
+
def consumption(consumer)
|
169
|
+
@consumptions[consumer] ||= Processing::Result.new
|
170
|
+
end
|
171
|
+
|
165
172
|
# Allows to run synchronized (locked) code that can operate only from a given thread
|
166
173
|
#
|
167
174
|
# @param block [Proc] code we want to run in the synchronized mode
|
175
|
+
#
|
168
176
|
# @note We check if mutex is not owned already by the current thread so we won't end up with
|
169
177
|
# a deadlock in case user runs coordinated code from inside of his own lock
|
178
|
+
#
|
179
|
+
# @note This is internal and should **not** be used to synchronize user-facing code.
|
180
|
+
# Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
|
181
|
+
# This can and should however be used for multi-thread strategy applications and other
|
182
|
+
# internal operations locks.
|
170
183
|
def synchronize(&block)
|
171
184
|
if @mutex.owned?
|
172
185
|
yield
|
@@ -174,13 +187,6 @@ module Karafka
|
|
174
187
|
@mutex.synchronize(&block)
|
175
188
|
end
|
176
189
|
end
|
177
|
-
|
178
|
-
# @param consumer [Object] karafka consumer (normal or pro)
|
179
|
-
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
180
|
-
# consumption processing state.
|
181
|
-
def consumption(consumer)
|
182
|
-
@consumptions[consumer] ||= Processing::Result.new
|
183
|
-
end
|
184
190
|
end
|
185
191
|
end
|
186
192
|
end
|