karafka 2.2.12 → 2.2.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +141 -121
- data/Gemfile.lock +10 -10
- data/config/locales/errors.yml +2 -1
- data/docker-compose.yml +2 -0
- data/lib/karafka/admin.rb +109 -3
- data/lib/karafka/app.rb +7 -0
- data/lib/karafka/base_consumer.rb +23 -30
- data/lib/karafka/connection/client.rb +13 -10
- data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
- data/lib/karafka/connection/listener.rb +18 -10
- data/lib/karafka/connection/listeners_batch.rb +6 -1
- data/lib/karafka/contracts/config.rb +2 -1
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +0 -9
- data/lib/karafka/instrumentation/notifications.rb +6 -3
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/loader.rb +3 -2
- data/lib/karafka/pro/processing/coordinator.rb +12 -6
- data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +3 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +10 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +9 -5
- data/lib/karafka/processing/coordinator.rb +13 -7
- data/lib/karafka/processing/executor.rb +27 -3
- data/lib/karafka/processing/executors_buffer.rb +3 -3
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +45 -17
- data/lib/karafka/processing/schedulers/default.rb +41 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +17 -5
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/builder.rb +32 -17
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/subscription_group.rb +11 -6
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +5 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- data.tar.gz.sig +0 -0
- metadata +8 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -75
- data/lib/karafka/processing/scheduler.rb +0 -22
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
# Namespace for Pro components instrumentation related code
|
17
|
+
module Instrumentation
|
18
|
+
# Tracker used to keep track of performance metrics
|
19
|
+
# It provides insights that can be used to optimize processing flow
|
20
|
+
# @note Even if we have some race-conditions here it is relevant due to the quantity of data.
|
21
|
+
# This is why we do not mutex it.
|
22
|
+
class PerformanceTracker
|
23
|
+
include Singleton
|
24
|
+
|
25
|
+
# How many samples do we collect per topic partition
|
26
|
+
SAMPLES_COUNT = 200
|
27
|
+
|
28
|
+
private_constant :SAMPLES_COUNT
|
29
|
+
|
30
|
+
# Builds up nested concurrent hash for data tracking
|
31
|
+
def initialize
|
32
|
+
@processing_times = Hash.new do |topics_hash, topic|
|
33
|
+
topics_hash[topic] = Hash.new do |partitions_hash, partition|
|
34
|
+
partitions_hash[partition] = []
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# @param topic [String]
|
40
|
+
# @param partition [Integer]
|
41
|
+
# @return [Float] p95 processing time of a single message from a single topic partition
|
42
|
+
def processing_time_p95(topic, partition)
|
43
|
+
values = @processing_times[topic][partition]
|
44
|
+
|
45
|
+
return 0 if values.empty?
|
46
|
+
return values.first if values.size == 1
|
47
|
+
|
48
|
+
percentile(0.95, values)
|
49
|
+
end
|
50
|
+
|
51
|
+
# @private
|
52
|
+
# @param event [Karafka::Core::Monitoring::Event] event details
|
53
|
+
# Tracks time taken to process a single message of a given topic partition
|
54
|
+
def on_consumer_consumed(event)
|
55
|
+
consumer = event[:caller]
|
56
|
+
messages = consumer.messages
|
57
|
+
topic = messages.metadata.topic
|
58
|
+
partition = messages.metadata.partition
|
59
|
+
|
60
|
+
samples = @processing_times[topic][partition]
|
61
|
+
samples << event[:time] / messages.count
|
62
|
+
|
63
|
+
return unless samples.size > SAMPLES_COUNT
|
64
|
+
|
65
|
+
samples.shift
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
# Computers the requested percentile out of provided values
|
71
|
+
# @param percentile [Float]
|
72
|
+
# @param values [Array<String>] all the values based on which we should
|
73
|
+
# @return [Float] computed percentile
|
74
|
+
def percentile(percentile, values)
|
75
|
+
values_sorted = values.sort
|
76
|
+
|
77
|
+
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
78
|
+
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
79
|
+
|
80
|
+
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -84,7 +84,8 @@ module Karafka
|
|
84
84
|
|
85
85
|
icfg.processing.coordinator_class = Processing::Coordinator
|
86
86
|
icfg.processing.partitioner_class = Processing::Partitioner
|
87
|
-
icfg.processing.
|
87
|
+
icfg.processing.scheduler_class = Processing::Schedulers::Default
|
88
|
+
icfg.processing.jobs_queue_class = Processing::JobsQueue
|
88
89
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
89
90
|
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
90
91
|
|
@@ -92,7 +93,7 @@ module Karafka
|
|
92
93
|
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
93
94
|
icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
94
95
|
|
95
|
-
config.monitor.subscribe(PerformanceTracker.instance)
|
96
|
+
config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
|
96
97
|
end
|
97
98
|
|
98
99
|
# Loads the Pro features of Karafka
|
@@ -21,14 +21,20 @@ module Karafka
|
|
21
21
|
|
22
22
|
def_delegators :@collapser, :collapsed?, :collapse_until!
|
23
23
|
|
24
|
-
attr_reader :filter, :virtual_offset_manager
|
24
|
+
attr_reader :filter, :virtual_offset_manager, :shared_mutex
|
25
25
|
|
26
26
|
# @param args [Object] anything the base coordinator accepts
|
27
27
|
def initialize(*args)
|
28
28
|
super
|
29
29
|
|
30
30
|
@executed = []
|
31
|
-
@
|
31
|
+
@flow_mutex = Mutex.new
|
32
|
+
# Lock for user code synchronization
|
33
|
+
# We do not want to mix coordinator lock with the user lock not to create cases where
|
34
|
+
# user imposed lock would lock the internal operations of Karafka
|
35
|
+
# This shared lock can be used by the end user as it is not used internally by the
|
36
|
+
# framework and can be used for user-facing locking
|
37
|
+
@shared_mutex = Mutex.new
|
32
38
|
@collapser = Collapser.new
|
33
39
|
@filter = FiltersApplier.new(self)
|
34
40
|
|
@@ -89,7 +95,7 @@ module Karafka
|
|
89
95
|
# Runs synchronized code once for a collective of virtual partitions prior to work being
|
90
96
|
# enqueued
|
91
97
|
def on_enqueued
|
92
|
-
@
|
98
|
+
@flow_mutex.synchronize do
|
93
99
|
return unless executable?(:on_enqueued)
|
94
100
|
|
95
101
|
yield(@last_message)
|
@@ -98,7 +104,7 @@ module Karafka
|
|
98
104
|
|
99
105
|
# Runs given code only once per all the coordinated jobs upon starting first of them
|
100
106
|
def on_started
|
101
|
-
@
|
107
|
+
@flow_mutex.synchronize do
|
102
108
|
return unless executable?(:on_started)
|
103
109
|
|
104
110
|
yield(@last_message)
|
@@ -109,7 +115,7 @@ module Karafka
|
|
109
115
|
# It runs once per all the coordinated jobs and should be used to run any type of post
|
110
116
|
# jobs coordination processing execution
|
111
117
|
def on_finished
|
112
|
-
@
|
118
|
+
@flow_mutex.synchronize do
|
113
119
|
return unless finished?
|
114
120
|
return unless executable?(:on_finished)
|
115
121
|
|
@@ -119,7 +125,7 @@ module Karafka
|
|
119
125
|
|
120
126
|
# Runs once after a partition is revoked
|
121
127
|
def on_revoked
|
122
|
-
@
|
128
|
+
@flow_mutex.synchronize do
|
123
129
|
return unless executable?(:on_revoked)
|
124
130
|
|
125
131
|
yield(@last_message)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Enhanced processing queue that provides ability to build complex work-distribution
|
18
|
+
# schedulers dedicated to particular job types
|
19
|
+
#
|
20
|
+
# Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
|
21
|
+
class JobsQueue < Karafka::Processing::JobsQueue
|
22
|
+
attr_accessor :in_processing
|
23
|
+
|
24
|
+
# @return [Karafka::Pro::Processing::JobsQueue]
|
25
|
+
def initialize
|
26
|
+
super
|
27
|
+
|
28
|
+
@in_waiting = Hash.new { |h, k| h[k] = [] }
|
29
|
+
|
30
|
+
@statistics[:waiting] = 0
|
31
|
+
end
|
32
|
+
|
33
|
+
# Method that allows us to lock queue on a given subscription group without enqueuing the a
|
34
|
+
# job. This can be used when building complex schedulers that want to postpone enqueuing
|
35
|
+
# before certain conditions are met.
|
36
|
+
#
|
37
|
+
# @param job [Jobs::Base] job used for locking
|
38
|
+
def lock(job)
|
39
|
+
@mutex.synchronize do
|
40
|
+
group = @in_waiting[job.group_id]
|
41
|
+
|
42
|
+
# This should never happen. Same job should not be locked twice
|
43
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
|
44
|
+
|
45
|
+
@statistics[:waiting] += 1
|
46
|
+
|
47
|
+
group << job
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Method for unlocking the given subscription group queue space that was locked with a
|
52
|
+
# given job that was **not** added to the queue but used via `#lock`.
|
53
|
+
#
|
54
|
+
# @param job [Jobs::Base] job that locked the queue
|
55
|
+
def unlock(job)
|
56
|
+
@mutex.synchronize do
|
57
|
+
@statistics[:waiting] -= 1
|
58
|
+
|
59
|
+
return if @in_waiting[job.group_id].delete(job)
|
60
|
+
|
61
|
+
# This should never happen. It means there was a job being unlocked that was never
|
62
|
+
# locked in the first place
|
63
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Clears the processing states for a provided group. Useful when a recovery happens and we
|
68
|
+
# need to clean up state but only for a given subscription group.
|
69
|
+
#
|
70
|
+
# @param group_id [String]
|
71
|
+
def clear(group_id)
|
72
|
+
@mutex.synchronize do
|
73
|
+
@in_processing[group_id].clear
|
74
|
+
|
75
|
+
@statistics[:waiting] -= @in_waiting[group_id].size
|
76
|
+
@in_waiting[group_id].clear
|
77
|
+
|
78
|
+
# We unlock it just in case it was blocked when clearing started
|
79
|
+
tick(group_id)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# @param group_id [String]
|
84
|
+
#
|
85
|
+
# @return [Boolean] tell us if we have anything in the processing (or for processing) from
|
86
|
+
# a given group.
|
87
|
+
def empty?(group_id)
|
88
|
+
@mutex.synchronize do
|
89
|
+
@in_processing[group_id].empty? &&
|
90
|
+
@in_waiting[group_id].empty?
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# @param group_id [String] id of the group in which jobs we're interested.
|
97
|
+
# @return [Boolean] should we keep waiting or not
|
98
|
+
# @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
|
99
|
+
# as they may exceed `max.poll.interval`
|
100
|
+
def wait?(group_id)
|
101
|
+
!(
|
102
|
+
@in_processing[group_id].all?(&:non_blocking?) &&
|
103
|
+
@in_waiting[group_id].all?(&:non_blocking?)
|
104
|
+
)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Namespace for Pro schedulers related components
|
18
|
+
module Schedulers
|
19
|
+
# Base for all the Pro custom schedulers
|
20
|
+
#
|
21
|
+
# It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
|
22
|
+
# scheduling other work that could impact the decision making in between multiple
|
23
|
+
# subscription groups running in separate threads.
|
24
|
+
#
|
25
|
+
# @note All the `on_` methods can be redefined with a non-thread-safe versions without
|
26
|
+
# locks if needed, however when doing so, ensure that your scheduler is stateless.
|
27
|
+
class Base
|
28
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
29
|
+
def initialize(queue)
|
30
|
+
@queue = queue
|
31
|
+
@mutex = Mutex.new
|
32
|
+
end
|
33
|
+
|
34
|
+
# Runs the consumption jobs scheduling flow under a mutex
|
35
|
+
#
|
36
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
37
|
+
def on_schedule_consumption(jobs_array)
|
38
|
+
@mutex.synchronize do
|
39
|
+
schedule_consumption(jobs_array)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Should schedule the consumption jobs
|
44
|
+
#
|
45
|
+
# @param _jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
46
|
+
def schedule_consumption(_jobs_array)
|
47
|
+
raise NotImplementedError, 'Implement in a subclass'
|
48
|
+
end
|
49
|
+
|
50
|
+
# Runs the revocation jobs scheduling flow under a mutex
|
51
|
+
#
|
52
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
53
|
+
def on_schedule_revocation(jobs_array)
|
54
|
+
@mutex.synchronize do
|
55
|
+
schedule_revocation(jobs_array)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Schedules the revocation jobs.
|
60
|
+
#
|
61
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
62
|
+
#
|
63
|
+
# @note We provide a default scheduler logic here because by default revocation jobs
|
64
|
+
# should be scheduled as fast as possible.
|
65
|
+
def schedule_revocation(jobs_array)
|
66
|
+
jobs_array.each do |job|
|
67
|
+
@queue << job
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Runs the shutdown jobs scheduling flow under a mutex
|
72
|
+
#
|
73
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
74
|
+
def on_schedule_shutdown(jobs_array)
|
75
|
+
@mutex.synchronize do
|
76
|
+
schedule_shutdown(jobs_array)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Schedules the shutdown jobs.
|
81
|
+
#
|
82
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
83
|
+
#
|
84
|
+
# @note We provide a default scheduler logic here because by default revocation jobs
|
85
|
+
# should be scheduled as fast as possible.
|
86
|
+
def schedule_shutdown(jobs_array)
|
87
|
+
jobs_array.each do |job|
|
88
|
+
@queue << job
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Runs the manage tick under mutex
|
93
|
+
def on_manage
|
94
|
+
@mutex.synchronize { manage }
|
95
|
+
end
|
96
|
+
|
97
|
+
# Should manage scheduling on jobs state changes
|
98
|
+
#
|
99
|
+
# By default does nothing as default schedulers are stateless
|
100
|
+
def manage
|
101
|
+
nil
|
102
|
+
end
|
103
|
+
|
104
|
+
# Runs clearing under mutex
|
105
|
+
#
|
106
|
+
# @param group_id [String] Subscription group id
|
107
|
+
def on_clear(group_id)
|
108
|
+
@mutex.synchronize { clear(group_id) }
|
109
|
+
end
|
110
|
+
|
111
|
+
# By default schedulers are stateless, so nothing to clear.
|
112
|
+
#
|
113
|
+
# @param _group_id [String] Subscription group id
|
114
|
+
def clear(_group_id)
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
# @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
|
121
|
+
# inside of the scheduler
|
122
|
+
attr_reader :queue
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
# Namespace for Pro schedulers
|
18
|
+
module Schedulers
|
19
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
20
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
21
|
+
#
|
22
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations
|
23
|
+
# as when taking IO into consideration, the can achieve optimized parallel processing.
|
24
|
+
#
|
25
|
+
# This scheduler can also work with virtual partitions.
|
26
|
+
#
|
27
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
28
|
+
# default FIFO scheduler from the default Karafka scheduler
|
29
|
+
#
|
30
|
+
# @note This is a stateless scheduler, thus we can override the `#on_` API.
|
31
|
+
class Default < Base
|
32
|
+
# Schedules jobs in the LJF order for consumption
|
33
|
+
#
|
34
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
35
|
+
def on_schedule_consumption(jobs_array)
|
36
|
+
perf_tracker = Instrumentation::PerformanceTracker.instance
|
37
|
+
|
38
|
+
ordered = []
|
39
|
+
|
40
|
+
jobs_array.each do |job|
|
41
|
+
ordered << [
|
42
|
+
job,
|
43
|
+
processing_cost(perf_tracker, job)
|
44
|
+
]
|
45
|
+
end
|
46
|
+
|
47
|
+
ordered.sort_by!(&:last)
|
48
|
+
ordered.reverse!
|
49
|
+
ordered.map!(&:first)
|
50
|
+
|
51
|
+
ordered.each do |job|
|
52
|
+
@queue << job
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Schedules jobs in the fifo order
|
57
|
+
#
|
58
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
59
|
+
def on_schedule_revocation(jobs_array)
|
60
|
+
jobs_array.each do |job|
|
61
|
+
@queue << job
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Schedules jobs in the fifo order
|
66
|
+
#
|
67
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
68
|
+
def on_schedule_shutdown(jobs_array)
|
69
|
+
jobs_array.each do |job|
|
70
|
+
@queue << job
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# This scheduler does not have anything to manage as it is a pass through and has no
|
75
|
+
# state
|
76
|
+
def on_manage
|
77
|
+
nil
|
78
|
+
end
|
79
|
+
|
80
|
+
# This scheduler does not need to be cleared because it is stateless
|
81
|
+
#
|
82
|
+
# @param _group_id [String] Subscription group id
|
83
|
+
def on_clear(_group_id)
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
# @param perf_tracker [PerformanceTracker]
|
90
|
+
# @param job [Karafka::Processing::Jobs::Base] job we will be processing
|
91
|
+
# @return [Numeric] estimated cost of processing this job
|
92
|
+
def processing_cost(perf_tracker, job)
|
93
|
+
if job.is_a?(::Karafka::Processing::Jobs::Consume)
|
94
|
+
messages = job.messages
|
95
|
+
message = messages.first
|
96
|
+
|
97
|
+
perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
|
98
|
+
else
|
99
|
+
# LJF will set first the most expensive, but we want to run the zero cost jobs
|
100
|
+
# related to the lifecycle always first. That is why we "emulate" that they
|
101
|
+
# the longest possible jobs that anyone can run
|
102
|
+
Float::INFINITY
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -28,7 +28,9 @@ module Karafka
|
|
28
28
|
FEATURES = %i[].freeze
|
29
29
|
|
30
30
|
# No actions needed for the standard flow here
|
31
|
-
def
|
31
|
+
def handle_before_schedule_consume
|
32
|
+
Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
|
33
|
+
|
32
34
|
nil
|
33
35
|
end
|
34
36
|
|
@@ -29,7 +29,7 @@ module Karafka
|
|
29
29
|
].freeze
|
30
30
|
|
31
31
|
# We always need to pause prior to doing any jobs for LRJ
|
32
|
-
def
|
32
|
+
def handle_before_schedule_consume
|
33
33
|
super
|
34
34
|
|
35
35
|
# This ensures that when running LRJ with VP, things operate as expected run only
|
@@ -77,6 +77,15 @@ module Karafka
|
|
77
77
|
revoked
|
78
78
|
end
|
79
79
|
end
|
80
|
+
|
81
|
+
# Allows for LRJ to synchronize its work. It may be needed because LRJ can run
|
82
|
+
# lifecycle events like revocation while the LRJ work is running and there may be a
|
83
|
+
# need for a critical section.
|
84
|
+
#
|
85
|
+
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
86
|
+
def synchronize(&block)
|
87
|
+
coordinator.shared_mutex.synchronize(&block)
|
88
|
+
end
|
80
89
|
end
|
81
90
|
end
|
82
91
|
end
|
@@ -94,13 +94,15 @@ module Karafka
|
|
94
94
|
|
95
95
|
# Allows for cross-virtual-partition consumers locks
|
96
96
|
#
|
97
|
-
# This is not needed in the non-VP flows because there is always only one
|
98
|
-
# per partition at the same time, so no coordination is needed directly for
|
99
|
-
# end users
|
97
|
+
# This is not needed in the non-VP flows except LRJ because there is always only one
|
98
|
+
# consumer per partition at the same time, so no coordination is needed directly for
|
99
|
+
# the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
|
100
|
+
# because lifecycle events on revocation can run in parallel to the LRJ job as it is
|
101
|
+
# non-blocking.
|
100
102
|
#
|
101
103
|
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
102
104
|
def synchronize(&block)
|
103
|
-
coordinator.synchronize(&block)
|
105
|
+
coordinator.shared_mutex.synchronize(&block)
|
104
106
|
end
|
105
107
|
|
106
108
|
private
|
@@ -110,7 +112,9 @@ module Karafka
|
|
110
112
|
#
|
111
113
|
# @note This can be done without the mutex, because it happens from the same thread
|
112
114
|
# for all the work (listener thread)
|
113
|
-
def
|
115
|
+
def handle_before_schedule_consume
|
116
|
+
super
|
117
|
+
|
114
118
|
coordinator.virtual_offset_manager.register(
|
115
119
|
messages.map(&:offset)
|
116
120
|
)
|
@@ -162,11 +162,24 @@ module Karafka
|
|
162
162
|
@manual_seek
|
163
163
|
end
|
164
164
|
|
165
|
+
# @param consumer [Object] karafka consumer (normal or pro)
|
166
|
+
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
167
|
+
# consumption processing state.
|
168
|
+
def consumption(consumer)
|
169
|
+
@consumptions[consumer] ||= Processing::Result.new
|
170
|
+
end
|
171
|
+
|
165
172
|
# Allows to run synchronized (locked) code that can operate only from a given thread
|
166
173
|
#
|
167
174
|
# @param block [Proc] code we want to run in the synchronized mode
|
175
|
+
#
|
168
176
|
# @note We check if mutex is not owned already by the current thread so we won't end up with
|
169
177
|
# a deadlock in case user runs coordinated code from inside of his own lock
|
178
|
+
#
|
179
|
+
# @note This is internal and should **not** be used to synchronize user-facing code.
|
180
|
+
# Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
|
181
|
+
# This can and should however be used for multi-thread strategy applications and other
|
182
|
+
# internal operations locks.
|
170
183
|
def synchronize(&block)
|
171
184
|
if @mutex.owned?
|
172
185
|
yield
|
@@ -174,13 +187,6 @@ module Karafka
|
|
174
187
|
@mutex.synchronize(&block)
|
175
188
|
end
|
176
189
|
end
|
177
|
-
|
178
|
-
# @param consumer [Object] karafka consumer (normal or pro)
|
179
|
-
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
180
|
-
# consumption processing state.
|
181
|
-
def consumption(consumer)
|
182
|
-
@consumptions[consumer] ||= Processing::Result.new
|
183
|
-
end
|
184
190
|
end
|
185
191
|
end
|
186
192
|
end
|