karafka 2.2.12 → 2.2.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +141 -121
  4. data/Gemfile.lock +10 -10
  5. data/config/locales/errors.yml +2 -1
  6. data/docker-compose.yml +2 -0
  7. data/lib/karafka/admin.rb +109 -3
  8. data/lib/karafka/app.rb +7 -0
  9. data/lib/karafka/base_consumer.rb +23 -30
  10. data/lib/karafka/connection/client.rb +13 -10
  11. data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
  12. data/lib/karafka/connection/listener.rb +18 -10
  13. data/lib/karafka/connection/listeners_batch.rb +6 -1
  14. data/lib/karafka/contracts/config.rb +2 -1
  15. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  16. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  17. data/lib/karafka/instrumentation/logger_listener.rb +0 -9
  18. data/lib/karafka/instrumentation/notifications.rb +6 -3
  19. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
  20. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  21. data/lib/karafka/pro/loader.rb +3 -2
  22. data/lib/karafka/pro/processing/coordinator.rb +12 -6
  23. data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
  24. data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
  25. data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
  26. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  27. data/lib/karafka/pro/processing/strategies/default.rb +3 -1
  28. data/lib/karafka/pro/processing/strategies/lrj/default.rb +10 -1
  29. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  30. data/lib/karafka/pro/processing/strategies/vp/default.rb +9 -5
  31. data/lib/karafka/processing/coordinator.rb +13 -7
  32. data/lib/karafka/processing/executor.rb +27 -3
  33. data/lib/karafka/processing/executors_buffer.rb +3 -3
  34. data/lib/karafka/processing/jobs/base.rb +19 -2
  35. data/lib/karafka/processing/jobs/consume.rb +3 -3
  36. data/lib/karafka/processing/jobs/idle.rb +5 -0
  37. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  38. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  39. data/lib/karafka/processing/jobs_queue.rb +45 -17
  40. data/lib/karafka/processing/schedulers/default.rb +41 -0
  41. data/lib/karafka/processing/strategies/base.rb +13 -4
  42. data/lib/karafka/processing/strategies/default.rb +17 -5
  43. data/lib/karafka/processing/worker.rb +4 -1
  44. data/lib/karafka/routing/builder.rb +32 -17
  45. data/lib/karafka/routing/proxy.rb +4 -3
  46. data/lib/karafka/routing/subscription_group.rb +11 -6
  47. data/lib/karafka/routing/topics.rb +1 -1
  48. data/lib/karafka/runner.rb +1 -1
  49. data/lib/karafka/setup/config.rb +5 -1
  50. data/lib/karafka/version.rb +1 -1
  51. data/lib/karafka.rb +0 -1
  52. data.tar.gz.sig +0 -0
  53. metadata +8 -5
  54. metadata.gz.sig +0 -0
  55. data/lib/karafka/pro/performance_tracker.rb +0 -84
  56. data/lib/karafka/pro/processing/scheduler.rb +0 -75
  57. data/lib/karafka/processing/scheduler.rb +0 -22
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Namespace for Pro components instrumentation related code
17
+ module Instrumentation
18
+ # Tracker used to keep track of performance metrics
19
+ # It provides insights that can be used to optimize processing flow
20
+ # @note Even if we have some race-conditions here it is relevant due to the quantity of data.
21
+ # This is why we do not mutex it.
22
+ class PerformanceTracker
23
+ include Singleton
24
+
25
+ # How many samples do we collect per topic partition
26
+ SAMPLES_COUNT = 200
27
+
28
+ private_constant :SAMPLES_COUNT
29
+
30
+ # Builds up nested concurrent hash for data tracking
31
+ def initialize
32
+ @processing_times = Hash.new do |topics_hash, topic|
33
+ topics_hash[topic] = Hash.new do |partitions_hash, partition|
34
+ partitions_hash[partition] = []
35
+ end
36
+ end
37
+ end
38
+
39
+ # @param topic [String]
40
+ # @param partition [Integer]
41
+ # @return [Float] p95 processing time of a single message from a single topic partition
42
+ def processing_time_p95(topic, partition)
43
+ values = @processing_times[topic][partition]
44
+
45
+ return 0 if values.empty?
46
+ return values.first if values.size == 1
47
+
48
+ percentile(0.95, values)
49
+ end
50
+
51
+ # @private
52
+ # @param event [Karafka::Core::Monitoring::Event] event details
53
+ # Tracks time taken to process a single message of a given topic partition
54
+ def on_consumer_consumed(event)
55
+ consumer = event[:caller]
56
+ messages = consumer.messages
57
+ topic = messages.metadata.topic
58
+ partition = messages.metadata.partition
59
+
60
+ samples = @processing_times[topic][partition]
61
+ samples << event[:time] / messages.count
62
+
63
+ return unless samples.size > SAMPLES_COUNT
64
+
65
+ samples.shift
66
+ end
67
+
68
+ private
69
+
70
+ # Computers the requested percentile out of provided values
71
+ # @param percentile [Float]
72
+ # @param values [Array<String>] all the values based on which we should
73
+ # @return [Float] computed percentile
74
+ def percentile(percentile, values)
75
+ values_sorted = values.sort
76
+
77
+ floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
78
+ mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
79
+
80
+ values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -84,7 +84,8 @@ module Karafka
84
84
 
85
85
  icfg.processing.coordinator_class = Processing::Coordinator
86
86
  icfg.processing.partitioner_class = Processing::Partitioner
87
- icfg.processing.scheduler = Processing::Scheduler.new
87
+ icfg.processing.scheduler_class = Processing::Schedulers::Default
88
+ icfg.processing.jobs_queue_class = Processing::JobsQueue
88
89
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
89
90
  icfg.processing.strategy_selector = Processing::StrategySelector.new
90
91
 
@@ -92,7 +93,7 @@ module Karafka
92
93
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
93
94
  icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
94
95
 
95
- config.monitor.subscribe(PerformanceTracker.instance)
96
+ config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
96
97
  end
97
98
 
98
99
  # Loads the Pro features of Karafka
@@ -21,14 +21,20 @@ module Karafka
21
21
 
22
22
  def_delegators :@collapser, :collapsed?, :collapse_until!
23
23
 
24
- attr_reader :filter, :virtual_offset_manager
24
+ attr_reader :filter, :virtual_offset_manager, :shared_mutex
25
25
 
26
26
  # @param args [Object] anything the base coordinator accepts
27
27
  def initialize(*args)
28
28
  super
29
29
 
30
30
  @executed = []
31
- @flow_lock = Mutex.new
31
+ @flow_mutex = Mutex.new
32
+ # Lock for user code synchronization
33
+ # We do not want to mix coordinator lock with the user lock not to create cases where
34
+ # user imposed lock would lock the internal operations of Karafka
35
+ # This shared lock can be used by the end user as it is not used internally by the
36
+ # framework and can be used for user-facing locking
37
+ @shared_mutex = Mutex.new
32
38
  @collapser = Collapser.new
33
39
  @filter = FiltersApplier.new(self)
34
40
 
@@ -89,7 +95,7 @@ module Karafka
89
95
  # Runs synchronized code once for a collective of virtual partitions prior to work being
90
96
  # enqueued
91
97
  def on_enqueued
92
- @flow_lock.synchronize do
98
+ @flow_mutex.synchronize do
93
99
  return unless executable?(:on_enqueued)
94
100
 
95
101
  yield(@last_message)
@@ -98,7 +104,7 @@ module Karafka
98
104
 
99
105
  # Runs given code only once per all the coordinated jobs upon starting first of them
100
106
  def on_started
101
- @flow_lock.synchronize do
107
+ @flow_mutex.synchronize do
102
108
  return unless executable?(:on_started)
103
109
 
104
110
  yield(@last_message)
@@ -109,7 +115,7 @@ module Karafka
109
115
  # It runs once per all the coordinated jobs and should be used to run any type of post
110
116
  # jobs coordination processing execution
111
117
  def on_finished
112
- @flow_lock.synchronize do
118
+ @flow_mutex.synchronize do
113
119
  return unless finished?
114
120
  return unless executable?(:on_finished)
115
121
 
@@ -119,7 +125,7 @@ module Karafka
119
125
 
120
126
  # Runs once after a partition is revoked
121
127
  def on_revoked
122
- @flow_lock.synchronize do
128
+ @flow_mutex.synchronize do
123
129
  return unless executable?(:on_revoked)
124
130
 
125
131
  yield(@last_message)
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Enhanced processing queue that provides ability to build complex work-distribution
18
+ # schedulers dedicated to particular job types
19
+ #
20
+ # Aside from the OSS queue capabilities it allows for jobless locking for advanced schedulers
21
+ class JobsQueue < Karafka::Processing::JobsQueue
22
+ attr_accessor :in_processing
23
+
24
+ # @return [Karafka::Pro::Processing::JobsQueue]
25
+ def initialize
26
+ super
27
+
28
+ @in_waiting = Hash.new { |h, k| h[k] = [] }
29
+
30
+ @statistics[:waiting] = 0
31
+ end
32
+
33
+ # Method that allows us to lock queue on a given subscription group without enqueuing the a
34
+ # job. This can be used when building complex schedulers that want to postpone enqueuing
35
+ # before certain conditions are met.
36
+ #
37
+ # @param job [Jobs::Base] job used for locking
38
+ def lock(job)
39
+ @mutex.synchronize do
40
+ group = @in_waiting[job.group_id]
41
+
42
+ # This should never happen. Same job should not be locked twice
43
+ raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
44
+
45
+ @statistics[:waiting] += 1
46
+
47
+ group << job
48
+ end
49
+ end
50
+
51
+ # Method for unlocking the given subscription group queue space that was locked with a
52
+ # given job that was **not** added to the queue but used via `#lock`.
53
+ #
54
+ # @param job [Jobs::Base] job that locked the queue
55
+ def unlock(job)
56
+ @mutex.synchronize do
57
+ @statistics[:waiting] -= 1
58
+
59
+ return if @in_waiting[job.group_id].delete(job)
60
+
61
+ # This should never happen. It means there was a job being unlocked that was never
62
+ # locked in the first place
63
+ raise(Errors::JobsQueueSynchronizationError, job.group_id)
64
+ end
65
+ end
66
+
67
+ # Clears the processing states for a provided group. Useful when a recovery happens and we
68
+ # need to clean up state but only for a given subscription group.
69
+ #
70
+ # @param group_id [String]
71
+ def clear(group_id)
72
+ @mutex.synchronize do
73
+ @in_processing[group_id].clear
74
+
75
+ @statistics[:waiting] -= @in_waiting[group_id].size
76
+ @in_waiting[group_id].clear
77
+
78
+ # We unlock it just in case it was blocked when clearing started
79
+ tick(group_id)
80
+ end
81
+ end
82
+
83
+ # @param group_id [String]
84
+ #
85
+ # @return [Boolean] tell us if we have anything in the processing (or for processing) from
86
+ # a given group.
87
+ def empty?(group_id)
88
+ @mutex.synchronize do
89
+ @in_processing[group_id].empty? &&
90
+ @in_waiting[group_id].empty?
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ # @param group_id [String] id of the group in which jobs we're interested.
97
+ # @return [Boolean] should we keep waiting or not
98
+ # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
99
+ # as they may exceed `max.poll.interval`
100
+ def wait?(group_id)
101
+ !(
102
+ @in_processing[group_id].all?(&:non_blocking?) &&
103
+ @in_waiting[group_id].all?(&:non_blocking?)
104
+ )
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro schedulers related components
18
+ module Schedulers
19
+ # Base for all the Pro custom schedulers
20
+ #
21
+ # It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
22
+ # scheduling other work that could impact the decision making in between multiple
23
+ # subscription groups running in separate threads.
24
+ #
25
+ # @note All the `on_` methods can be redefined with a non-thread-safe versions without
26
+ # locks if needed, however when doing so, ensure that your scheduler is stateless.
27
+ class Base
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ def initialize(queue)
30
+ @queue = queue
31
+ @mutex = Mutex.new
32
+ end
33
+
34
+ # Runs the consumption jobs scheduling flow under a mutex
35
+ #
36
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
37
+ def on_schedule_consumption(jobs_array)
38
+ @mutex.synchronize do
39
+ schedule_consumption(jobs_array)
40
+ end
41
+ end
42
+
43
+ # Should schedule the consumption jobs
44
+ #
45
+ # @param _jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
46
+ def schedule_consumption(_jobs_array)
47
+ raise NotImplementedError, 'Implement in a subclass'
48
+ end
49
+
50
+ # Runs the revocation jobs scheduling flow under a mutex
51
+ #
52
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
53
+ def on_schedule_revocation(jobs_array)
54
+ @mutex.synchronize do
55
+ schedule_revocation(jobs_array)
56
+ end
57
+ end
58
+
59
+ # Schedules the revocation jobs.
60
+ #
61
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
62
+ #
63
+ # @note We provide a default scheduler logic here because by default revocation jobs
64
+ # should be scheduled as fast as possible.
65
+ def schedule_revocation(jobs_array)
66
+ jobs_array.each do |job|
67
+ @queue << job
68
+ end
69
+ end
70
+
71
+ # Runs the shutdown jobs scheduling flow under a mutex
72
+ #
73
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
74
+ def on_schedule_shutdown(jobs_array)
75
+ @mutex.synchronize do
76
+ schedule_shutdown(jobs_array)
77
+ end
78
+ end
79
+
80
+ # Schedules the shutdown jobs.
81
+ #
82
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
83
+ #
84
+ # @note We provide a default scheduler logic here because by default revocation jobs
85
+ # should be scheduled as fast as possible.
86
+ def schedule_shutdown(jobs_array)
87
+ jobs_array.each do |job|
88
+ @queue << job
89
+ end
90
+ end
91
+
92
+ # Runs the manage tick under mutex
93
+ def on_manage
94
+ @mutex.synchronize { manage }
95
+ end
96
+
97
+ # Should manage scheduling on jobs state changes
98
+ #
99
+ # By default does nothing as default schedulers are stateless
100
+ def manage
101
+ nil
102
+ end
103
+
104
+ # Runs clearing under mutex
105
+ #
106
+ # @param group_id [String] Subscription group id
107
+ def on_clear(group_id)
108
+ @mutex.synchronize { clear(group_id) }
109
+ end
110
+
111
+ # By default schedulers are stateless, so nothing to clear.
112
+ #
113
+ # @param _group_id [String] Subscription group id
114
+ def clear(_group_id)
115
+ nil
116
+ end
117
+
118
+ private
119
+
120
+ # @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
121
+ # inside of the scheduler
122
+ attr_reader :queue
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro schedulers
18
+ module Schedulers
19
+ # Optimizes scheduler that takes into consideration of execution time needed to process
20
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
21
+ #
22
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
23
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
24
+ #
25
+ # This scheduler can also work with virtual partitions.
26
+ #
27
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
28
+ # default FIFO scheduler from the default Karafka scheduler
29
+ #
30
+ # @note This is a stateless scheduler, thus we can override the `#on_` API.
31
+ class Default < Base
32
+ # Schedules jobs in the LJF order for consumption
33
+ #
34
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
35
+ def on_schedule_consumption(jobs_array)
36
+ perf_tracker = Instrumentation::PerformanceTracker.instance
37
+
38
+ ordered = []
39
+
40
+ jobs_array.each do |job|
41
+ ordered << [
42
+ job,
43
+ processing_cost(perf_tracker, job)
44
+ ]
45
+ end
46
+
47
+ ordered.sort_by!(&:last)
48
+ ordered.reverse!
49
+ ordered.map!(&:first)
50
+
51
+ ordered.each do |job|
52
+ @queue << job
53
+ end
54
+ end
55
+
56
+ # Schedules jobs in the fifo order
57
+ #
58
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
59
+ def on_schedule_revocation(jobs_array)
60
+ jobs_array.each do |job|
61
+ @queue << job
62
+ end
63
+ end
64
+
65
+ # Schedules jobs in the fifo order
66
+ #
67
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
68
+ def on_schedule_shutdown(jobs_array)
69
+ jobs_array.each do |job|
70
+ @queue << job
71
+ end
72
+ end
73
+
74
+ # This scheduler does not have anything to manage as it is a pass through and has no
75
+ # state
76
+ def on_manage
77
+ nil
78
+ end
79
+
80
+ # This scheduler does not need to be cleared because it is stateless
81
+ #
82
+ # @param _group_id [String] Subscription group id
83
+ def on_clear(_group_id)
84
+ nil
85
+ end
86
+
87
+ private
88
+
89
+ # @param perf_tracker [PerformanceTracker]
90
+ # @param job [Karafka::Processing::Jobs::Base] job we will be processing
91
+ # @return [Numeric] estimated cost of processing this job
92
+ def processing_cost(perf_tracker, job)
93
+ if job.is_a?(::Karafka::Processing::Jobs::Consume)
94
+ messages = job.messages
95
+ message = messages.first
96
+
97
+ perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
98
+ else
99
+ # LJF will set first the most expensive, but we want to run the zero cost jobs
100
+ # related to the lifecycle always first. That is why we "emulate" that they
101
+ # the longest possible jobs that anyone can run
102
+ Float::INFINITY
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -33,7 +33,7 @@ module Karafka
33
33
  ].freeze
34
34
 
35
35
  # No actions needed for the standard flow here
36
- def handle_before_enqueue
36
+ def handle_before_schedule_consume
37
37
  super
38
38
 
39
39
  coordinator.on_enqueued do
@@ -28,7 +28,9 @@ module Karafka
28
28
  FEATURES = %i[].freeze
29
29
 
30
30
  # No actions needed for the standard flow here
31
- def handle_before_enqueue
31
+ def handle_before_schedule_consume
32
+ Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
33
+
32
34
  nil
33
35
  end
34
36
 
@@ -29,7 +29,7 @@ module Karafka
29
29
  ].freeze
30
30
 
31
31
  # We always need to pause prior to doing any jobs for LRJ
32
- def handle_before_enqueue
32
+ def handle_before_schedule_consume
33
33
  super
34
34
 
35
35
  # This ensures that when running LRJ with VP, things operate as expected run only
@@ -77,6 +77,15 @@ module Karafka
77
77
  revoked
78
78
  end
79
79
  end
80
+
81
+ # Allows for LRJ to synchronize its work. It may be needed because LRJ can run
82
+ # lifecycle events like revocation while the LRJ work is running and there may be a
83
+ # need for a critical section.
84
+ #
85
+ # @param block [Proc] block we want to run in a mutex to prevent race-conditions
86
+ def synchronize(&block)
87
+ coordinator.shared_mutex.synchronize(&block)
88
+ end
80
89
  end
81
90
  end
82
91
  end
@@ -29,7 +29,7 @@ module Karafka
29
29
  ].freeze
30
30
 
31
31
  # We always need to pause prior to doing any jobs for LRJ
32
- def handle_before_enqueue
32
+ def handle_before_schedule_consume
33
33
  super
34
34
 
35
35
  # This ensures that when running LRJ with VP, things operate as expected run only
@@ -94,13 +94,15 @@ module Karafka
94
94
 
95
95
  # Allows for cross-virtual-partition consumers locks
96
96
  #
97
- # This is not needed in the non-VP flows because there is always only one consumer
98
- # per partition at the same time, so no coordination is needed directly for the
99
- # end users
97
+ # This is not needed in the non-VP flows except LRJ because there is always only one
98
+ # consumer per partition at the same time, so no coordination is needed directly for
99
+ # the end users. With LRJ it is needed and provided in the `LRJ::Default` strategy,
100
+ # because lifecycle events on revocation can run in parallel to the LRJ job as it is
101
+ # non-blocking.
100
102
  #
101
103
  # @param block [Proc] block we want to run in a mutex to prevent race-conditions
102
104
  def synchronize(&block)
103
- coordinator.synchronize(&block)
105
+ coordinator.shared_mutex.synchronize(&block)
104
106
  end
105
107
 
106
108
  private
@@ -110,7 +112,9 @@ module Karafka
110
112
  #
111
113
  # @note This can be done without the mutex, because it happens from the same thread
112
114
  # for all the work (listener thread)
113
- def handle_before_enqueue
115
+ def handle_before_schedule_consume
116
+ super
117
+
114
118
  coordinator.virtual_offset_manager.register(
115
119
  messages.map(&:offset)
116
120
  )
@@ -162,11 +162,24 @@ module Karafka
162
162
  @manual_seek
163
163
  end
164
164
 
165
+ # @param consumer [Object] karafka consumer (normal or pro)
166
+ # @return [Karafka::Processing::Result] result object which we can use to indicate
167
+ # consumption processing state.
168
+ def consumption(consumer)
169
+ @consumptions[consumer] ||= Processing::Result.new
170
+ end
171
+
165
172
  # Allows to run synchronized (locked) code that can operate only from a given thread
166
173
  #
167
174
  # @param block [Proc] code we want to run in the synchronized mode
175
+ #
168
176
  # @note We check if mutex is not owned already by the current thread so we won't end up with
169
177
  # a deadlock in case user runs coordinated code from inside of his own lock
178
+ #
179
+ # @note This is internal and should **not** be used to synchronize user-facing code.
180
+ # Otherwise user indirectly could cause deadlocks or prolonged locks by running his logic.
181
+ # This can and should however be used for multi-thread strategy applications and other
182
+ # internal operations locks.
170
183
  def synchronize(&block)
171
184
  if @mutex.owned?
172
185
  yield
@@ -174,13 +187,6 @@ module Karafka
174
187
  @mutex.synchronize(&block)
175
188
  end
176
189
  end
177
-
178
- # @param consumer [Object] karafka consumer (normal or pro)
179
- # @return [Karafka::Processing::Result] result object which we can use to indicate
180
- # consumption processing state.
181
- def consumption(consumer)
182
- @consumptions[consumer] ||= Processing::Result.new
183
- end
184
190
  end
185
191
  end
186
192
  end