karafka 2.2.13 → 2.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +138 -125
- data/Gemfile.lock +3 -3
- data/docker-compose.yml +2 -0
- data/lib/karafka/admin.rb +109 -3
- data/lib/karafka/app.rb +7 -0
- data/lib/karafka/base_consumer.rb +23 -30
- data/lib/karafka/connection/client.rb +13 -10
- data/lib/karafka/connection/listener.rb +11 -9
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +0 -9
- data/lib/karafka/instrumentation/notifications.rb +6 -4
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/loader.rb +2 -2
- data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +2 -2
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +1 -1
- data/lib/karafka/processing/executor.rb +27 -3
- data/lib/karafka/processing/executors_buffer.rb +3 -3
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +19 -8
- data/lib/karafka/processing/schedulers/default.rb +41 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +17 -7
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/setup/config.rb +4 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +7 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -74
- data/lib/karafka/processing/scheduler.rb +0 -38
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
# Namespace for Pro schedulers related components
|
|
18
|
+
module Schedulers
|
|
19
|
+
# Base for all the Pro custom schedulers
|
|
20
|
+
#
|
|
21
|
+
# It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
|
|
22
|
+
# scheduling other work that could impact the decision making in between multiple
|
|
23
|
+
# subscription groups running in separate threads.
|
|
24
|
+
#
|
|
25
|
+
# @note All the `on_` methods can be redefined with a non-thread-safe versions without
|
|
26
|
+
# locks if needed, however when doing so, ensure that your scheduler is stateless.
|
|
27
|
+
class Base
|
|
28
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
|
29
|
+
def initialize(queue)
|
|
30
|
+
@queue = queue
|
|
31
|
+
@mutex = Mutex.new
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Runs the consumption jobs scheduling flow under a mutex
|
|
35
|
+
#
|
|
36
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
|
37
|
+
def on_schedule_consumption(jobs_array)
|
|
38
|
+
@mutex.synchronize do
|
|
39
|
+
schedule_consumption(jobs_array)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Should schedule the consumption jobs
|
|
44
|
+
#
|
|
45
|
+
# @param _jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
|
46
|
+
def schedule_consumption(_jobs_array)
|
|
47
|
+
raise NotImplementedError, 'Implement in a subclass'
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Runs the revocation jobs scheduling flow under a mutex
|
|
51
|
+
#
|
|
52
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
|
53
|
+
def on_schedule_revocation(jobs_array)
|
|
54
|
+
@mutex.synchronize do
|
|
55
|
+
schedule_revocation(jobs_array)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Schedules the revocation jobs.
|
|
60
|
+
#
|
|
61
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
|
62
|
+
#
|
|
63
|
+
# @note We provide a default scheduler logic here because by default revocation jobs
|
|
64
|
+
# should be scheduled as fast as possible.
|
|
65
|
+
def schedule_revocation(jobs_array)
|
|
66
|
+
jobs_array.each do |job|
|
|
67
|
+
@queue << job
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Runs the shutdown jobs scheduling flow under a mutex
|
|
72
|
+
#
|
|
73
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
|
74
|
+
def on_schedule_shutdown(jobs_array)
|
|
75
|
+
@mutex.synchronize do
|
|
76
|
+
schedule_shutdown(jobs_array)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Schedules the shutdown jobs.
|
|
81
|
+
#
|
|
82
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
|
|
83
|
+
#
|
|
84
|
+
# @note We provide a default scheduler logic here because by default revocation jobs
|
|
85
|
+
# should be scheduled as fast as possible.
|
|
86
|
+
def schedule_shutdown(jobs_array)
|
|
87
|
+
jobs_array.each do |job|
|
|
88
|
+
@queue << job
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Runs the manage tick under mutex
|
|
93
|
+
def on_manage
|
|
94
|
+
@mutex.synchronize { manage }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Should manage scheduling on jobs state changes
|
|
98
|
+
#
|
|
99
|
+
# By default does nothing as default schedulers are stateless
|
|
100
|
+
def manage
|
|
101
|
+
nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Runs clearing under mutex
|
|
105
|
+
#
|
|
106
|
+
# @param group_id [String] Subscription group id
|
|
107
|
+
def on_clear(group_id)
|
|
108
|
+
@mutex.synchronize { clear(group_id) }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# By default schedulers are stateless, so nothing to clear.
|
|
112
|
+
#
|
|
113
|
+
# @param _group_id [String] Subscription group id
|
|
114
|
+
def clear(_group_id)
|
|
115
|
+
nil
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private
|
|
119
|
+
|
|
120
|
+
# @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
|
|
121
|
+
# inside of the scheduler
|
|
122
|
+
attr_reader :queue
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
# Namespace for Pro schedulers
|
|
18
|
+
module Schedulers
|
|
19
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
|
20
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
|
21
|
+
#
|
|
22
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations
|
|
23
|
+
# as when taking IO into consideration, the can achieve optimized parallel processing.
|
|
24
|
+
#
|
|
25
|
+
# This scheduler can also work with virtual partitions.
|
|
26
|
+
#
|
|
27
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
|
28
|
+
# default FIFO scheduler from the default Karafka scheduler
|
|
29
|
+
#
|
|
30
|
+
# @note This is a stateless scheduler, thus we can override the `#on_` API.
|
|
31
|
+
class Default < Base
|
|
32
|
+
# Schedules jobs in the LJF order for consumption
|
|
33
|
+
#
|
|
34
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
35
|
+
def on_schedule_consumption(jobs_array)
|
|
36
|
+
perf_tracker = Instrumentation::PerformanceTracker.instance
|
|
37
|
+
|
|
38
|
+
ordered = []
|
|
39
|
+
|
|
40
|
+
jobs_array.each do |job|
|
|
41
|
+
ordered << [
|
|
42
|
+
job,
|
|
43
|
+
processing_cost(perf_tracker, job)
|
|
44
|
+
]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
ordered.sort_by!(&:last)
|
|
48
|
+
ordered.reverse!
|
|
49
|
+
ordered.map!(&:first)
|
|
50
|
+
|
|
51
|
+
ordered.each do |job|
|
|
52
|
+
@queue << job
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Schedules jobs in the fifo order
|
|
57
|
+
#
|
|
58
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
59
|
+
def on_schedule_revocation(jobs_array)
|
|
60
|
+
jobs_array.each do |job|
|
|
61
|
+
@queue << job
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Schedules jobs in the fifo order
|
|
66
|
+
#
|
|
67
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
68
|
+
def on_schedule_shutdown(jobs_array)
|
|
69
|
+
jobs_array.each do |job|
|
|
70
|
+
@queue << job
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# This scheduler does not have anything to manage as it is a pass through and has no
|
|
75
|
+
# state
|
|
76
|
+
def on_manage
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# This scheduler does not need to be cleared because it is stateless
|
|
81
|
+
#
|
|
82
|
+
# @param _group_id [String] Subscription group id
|
|
83
|
+
def on_clear(_group_id)
|
|
84
|
+
nil
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
# @param perf_tracker [PerformanceTracker]
|
|
90
|
+
# @param job [Karafka::Processing::Jobs::Base] job we will be processing
|
|
91
|
+
# @return [Numeric] estimated cost of processing this job
|
|
92
|
+
def processing_cost(perf_tracker, job)
|
|
93
|
+
if job.is_a?(::Karafka::Processing::Jobs::Consume)
|
|
94
|
+
messages = job.messages
|
|
95
|
+
message = messages.first
|
|
96
|
+
|
|
97
|
+
perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
|
|
98
|
+
else
|
|
99
|
+
# LJF will set first the most expensive, but we want to run the zero cost jobs
|
|
100
|
+
# related to the lifecycle always first. That is why we "emulate" that they
|
|
101
|
+
# the longest possible jobs that anyone can run
|
|
102
|
+
Float::INFINITY
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -28,8 +28,8 @@ module Karafka
|
|
|
28
28
|
FEATURES = %i[].freeze
|
|
29
29
|
|
|
30
30
|
# No actions needed for the standard flow here
|
|
31
|
-
def
|
|
32
|
-
Karafka.monitor.instrument('consumer.
|
|
31
|
+
def handle_before_schedule_consume
|
|
32
|
+
Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
|
|
33
33
|
|
|
34
34
|
nil
|
|
35
35
|
end
|
|
@@ -112,7 +112,7 @@ module Karafka
|
|
|
112
112
|
#
|
|
113
113
|
# @note This can be done without the mutex, because it happens from the same thread
|
|
114
114
|
# for all the work (listener thread)
|
|
115
|
-
def
|
|
115
|
+
def handle_before_schedule_consume
|
|
116
116
|
super
|
|
117
117
|
|
|
118
118
|
coordinator.virtual_offset_manager.register(
|
|
@@ -11,6 +11,15 @@ module Karafka
|
|
|
11
11
|
#
|
|
12
12
|
# @note Executors are not removed after partition is revoked. They are not that big and will
|
|
13
13
|
# be re-used in case of a re-claim
|
|
14
|
+
#
|
|
15
|
+
# @note Since given consumer can run various operations, executor manages that and its
|
|
16
|
+
# lifecycle. There are following types of operations with appropriate before/after, etc:
|
|
17
|
+
#
|
|
18
|
+
# - consume - primary operation related to running user consumption code
|
|
19
|
+
# - idle - cleanup job that runs on idle runs where no messages would be passed to the end
|
|
20
|
+
# user. This is used for complex flows with filters, etc
|
|
21
|
+
# - revoked - runs after the partition was revoked
|
|
22
|
+
# - shutdown - runs when process is going to shutdown
|
|
14
23
|
class Executor
|
|
15
24
|
extend Forwardable
|
|
16
25
|
|
|
@@ -39,11 +48,11 @@ module Karafka
|
|
|
39
48
|
end
|
|
40
49
|
|
|
41
50
|
# Allows us to prepare the consumer in the listener thread prior to the job being send to
|
|
42
|
-
#
|
|
51
|
+
# be scheduled. It also allows to run some code that is time sensitive and cannot wait in the
|
|
43
52
|
# queue as it could cause starvation.
|
|
44
53
|
#
|
|
45
54
|
# @param messages [Array<Karafka::Messages::Message>]
|
|
46
|
-
def
|
|
55
|
+
def before_schedule_consume(messages)
|
|
47
56
|
# Recreate consumer with each batch if persistence is not enabled
|
|
48
57
|
# We reload the consumers with each batch instead of relying on some external signals
|
|
49
58
|
# when needed for consistency. That way devs may have it on or off and not in this
|
|
@@ -60,7 +69,7 @@ module Karafka
|
|
|
60
69
|
Time.now
|
|
61
70
|
)
|
|
62
71
|
|
|
63
|
-
consumer.
|
|
72
|
+
consumer.on_before_schedule_consume
|
|
64
73
|
end
|
|
65
74
|
|
|
66
75
|
# Runs setup and warm-up code in the worker prior to running the consumption
|
|
@@ -79,6 +88,11 @@ module Karafka
|
|
|
79
88
|
consumer.on_after_consume
|
|
80
89
|
end
|
|
81
90
|
|
|
91
|
+
# Runs the code needed before idle work is scheduled
|
|
92
|
+
def before_schedule_idle
|
|
93
|
+
consumer.on_before_schedule_idle
|
|
94
|
+
end
|
|
95
|
+
|
|
82
96
|
# Runs consumer idle operations
|
|
83
97
|
# This may include house-keeping or other state management changes that can occur but that
|
|
84
98
|
# not mean there are any new messages available for the end user to process
|
|
@@ -96,6 +110,11 @@ module Karafka
|
|
|
96
110
|
consumer.on_idle
|
|
97
111
|
end
|
|
98
112
|
|
|
113
|
+
# Runs code needed before revoked job is scheduled
|
|
114
|
+
def before_schedule_revoked
|
|
115
|
+
consumer.on_before_schedule_revoked if @consumer
|
|
116
|
+
end
|
|
117
|
+
|
|
99
118
|
# Runs the controller `#revoked` method that should be triggered when a given consumer is
|
|
100
119
|
# no longer needed due to partitions reassignment.
|
|
101
120
|
#
|
|
@@ -112,6 +131,11 @@ module Karafka
|
|
|
112
131
|
consumer.on_revoked if @consumer
|
|
113
132
|
end
|
|
114
133
|
|
|
134
|
+
# Runs code needed before shutdown job is scheduled
|
|
135
|
+
def before_schedule_shutdown
|
|
136
|
+
consumer.on_before_schedule_shutdown if @consumer
|
|
137
|
+
end
|
|
138
|
+
|
|
115
139
|
# Runs the controller `#shutdown` method that should be triggered when a given consumer is
|
|
116
140
|
# no longer needed as we're closing the process.
|
|
117
141
|
#
|
|
@@ -54,9 +54,9 @@ module Karafka
|
|
|
54
54
|
# @yieldparam [Integer] partition number
|
|
55
55
|
# @yieldparam [Executor] given executor
|
|
56
56
|
def each
|
|
57
|
-
@buffer.
|
|
58
|
-
partitions.
|
|
59
|
-
executors.
|
|
57
|
+
@buffer.each_value do |partitions|
|
|
58
|
+
partitions.each_value do |executors|
|
|
59
|
+
executors.each_value do |executor|
|
|
60
60
|
yield(executor)
|
|
61
61
|
end
|
|
62
62
|
end
|
|
@@ -20,11 +20,14 @@ module Karafka
|
|
|
20
20
|
# All jobs are blocking by default and they can release the lock when blocking operations
|
|
21
21
|
# are done (if needed)
|
|
22
22
|
@non_blocking = false
|
|
23
|
+
@status = :pending
|
|
23
24
|
end
|
|
24
25
|
|
|
25
|
-
# When redefined can run any code prior to the job being
|
|
26
|
+
# When redefined can run any code prior to the job being scheduled
|
|
26
27
|
# @note This will run in the listener thread and not in the worker
|
|
27
|
-
def
|
|
28
|
+
def before_schedule
|
|
29
|
+
raise NotImplementedError, 'Please implement in a subclass'
|
|
30
|
+
end
|
|
28
31
|
|
|
29
32
|
# When redefined can run any code that should run before executing the proper code
|
|
30
33
|
def before_call; end
|
|
@@ -49,6 +52,20 @@ module Karafka
|
|
|
49
52
|
def non_blocking?
|
|
50
53
|
@non_blocking
|
|
51
54
|
end
|
|
55
|
+
|
|
56
|
+
# @return [Boolean] was this job finished.
|
|
57
|
+
def finished?
|
|
58
|
+
@status == :finished
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Marks the job as finished. Used by the worker to indicate, that this job is done.
|
|
62
|
+
#
|
|
63
|
+
# @note Since the scheduler knows exactly when it schedules jobs and when it keeps them
|
|
64
|
+
# pending, we do not need advanced state tracking and the only information from the
|
|
65
|
+
# "outside" is whether it was finished or not after it was scheduled for execution.
|
|
66
|
+
def finish!
|
|
67
|
+
@status = :finished
|
|
68
|
+
end
|
|
52
69
|
end
|
|
53
70
|
end
|
|
54
71
|
end
|
|
@@ -20,9 +20,9 @@ module Karafka
|
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
# Runs all the preparation code on the executor that needs to happen before the job is
|
|
23
|
-
#
|
|
24
|
-
def
|
|
25
|
-
executor.
|
|
23
|
+
# scheduled.
|
|
24
|
+
def before_schedule
|
|
25
|
+
executor.before_schedule_consume(@messages)
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
# Runs the before consumption preparations on the executor
|
|
@@ -23,12 +23,7 @@ module Karafka
|
|
|
23
23
|
# scheduled by Ruby hundreds of thousands of times per group.
|
|
24
24
|
# We cannot use a single semaphore as it could potentially block in listeners that should
|
|
25
25
|
# process with their data and also could unlock when a given group needs to remain locked
|
|
26
|
-
@semaphores =
|
|
27
|
-
# Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
|
|
28
|
-
# versions we use our custom queue wrapper
|
|
29
|
-
h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
|
|
30
|
-
end
|
|
31
|
-
|
|
26
|
+
@semaphores = {}
|
|
32
27
|
@concurrency = Karafka::App.config.concurrency
|
|
33
28
|
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
|
34
29
|
@in_processing = Hash.new { |h, k| h[k] = [] }
|
|
@@ -37,6 +32,22 @@ module Karafka
|
|
|
37
32
|
@mutex = Mutex.new
|
|
38
33
|
end
|
|
39
34
|
|
|
35
|
+
# Registers given subscription group id in the queue. It is needed so we do not dynamically
|
|
36
|
+
# create semaphore, hence avoiding potential race conditions
|
|
37
|
+
#
|
|
38
|
+
# @param group_id [String]
|
|
39
|
+
def register(group_id)
|
|
40
|
+
# Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
|
|
41
|
+
@mutex.synchronize do
|
|
42
|
+
# versions we use our custom queue wrapper
|
|
43
|
+
#
|
|
44
|
+
# Initializes this semaphore from the mutex, so it is never auto-created
|
|
45
|
+
# Since we always schedule a job before waiting using semaphores, there won't be any
|
|
46
|
+
# concurrency problems
|
|
47
|
+
@semaphores[group_id] = RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
40
51
|
# Adds the job to the internal main queue, scheduling it for execution in a worker and marks
|
|
41
52
|
# this job as in processing pipeline.
|
|
42
53
|
#
|
|
@@ -79,7 +90,7 @@ module Karafka
|
|
|
79
90
|
# @param group_id [String] id of the group we want to unlock for one tick
|
|
80
91
|
# @note This does not release the wait lock. It just causes a conditions recheck
|
|
81
92
|
def tick(group_id)
|
|
82
|
-
@semaphores
|
|
93
|
+
@semaphores.fetch(group_id) << true
|
|
83
94
|
end
|
|
84
95
|
|
|
85
96
|
# Marks a given job from a given group as completed. When there are no more jobs from a given
|
|
@@ -149,7 +160,7 @@ module Karafka
|
|
|
149
160
|
while wait?(group_id)
|
|
150
161
|
yield if block_given?
|
|
151
162
|
|
|
152
|
-
@semaphores
|
|
163
|
+
@semaphores.fetch(group_id).pop(timeout: @tick_interval / 1_000.0)
|
|
153
164
|
end
|
|
154
165
|
end
|
|
155
166
|
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Namespace for Karafka OSS schedulers
|
|
6
|
+
module Schedulers
|
|
7
|
+
# FIFO scheduler for messages coming from various topics and partitions
|
|
8
|
+
class Default
|
|
9
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
|
10
|
+
def initialize(queue)
|
|
11
|
+
@queue = queue
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Schedules jobs in the fifo order
|
|
15
|
+
#
|
|
16
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
17
|
+
def on_schedule_consumption(jobs_array)
|
|
18
|
+
jobs_array.each do |job|
|
|
19
|
+
@queue << job
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Both revocation and shutdown jobs can also run in fifo by default
|
|
24
|
+
alias on_schedule_revocation on_schedule_consumption
|
|
25
|
+
alias on_schedule_shutdown on_schedule_consumption
|
|
26
|
+
|
|
27
|
+
# This scheduler does not have anything to manage as it is a pass through and has no state
|
|
28
|
+
def on_manage
|
|
29
|
+
nil
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# This scheduler does not need to be cleared because it is stateless
|
|
33
|
+
#
|
|
34
|
+
# @param _group_id [String] Subscription group id
|
|
35
|
+
def on_clear(_group_id)
|
|
36
|
+
nil
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -11,10 +11,19 @@ module Karafka
|
|
|
11
11
|
module Strategies
|
|
12
12
|
# Base strategy that should be included in each strategy, just to ensure the API
|
|
13
13
|
module Base
|
|
14
|
-
#
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
# Defines all the before schedule handlers for appropriate actions
|
|
15
|
+
%i[
|
|
16
|
+
consume
|
|
17
|
+
idle
|
|
18
|
+
revoked
|
|
19
|
+
shutdown
|
|
20
|
+
].each do |action|
|
|
21
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
|
22
|
+
def handle_before_schedule_#{action}
|
|
23
|
+
# What should happen before scheduling this work
|
|
24
|
+
raise NotImplementedError, 'Implement in a subclass'
|
|
25
|
+
end
|
|
26
|
+
RUBY
|
|
18
27
|
end
|
|
19
28
|
|
|
20
29
|
# What should happen before we kick in the processing
|
|
@@ -13,6 +13,23 @@ module Karafka
|
|
|
13
13
|
# Apply strategy for a non-feature based flow
|
|
14
14
|
FEATURES = %i[].freeze
|
|
15
15
|
|
|
16
|
+
# By default on all "before schedule" we just run instrumentation, nothing more
|
|
17
|
+
%i[
|
|
18
|
+
consume
|
|
19
|
+
idle
|
|
20
|
+
revoked
|
|
21
|
+
shutdown
|
|
22
|
+
].each do |action|
|
|
23
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
|
24
|
+
# No actions needed for the standard flow here
|
|
25
|
+
def handle_before_schedule_#{action}
|
|
26
|
+
Karafka.monitor.instrument('consumer.before_schedule_#{action}', caller: self)
|
|
27
|
+
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
RUBY
|
|
31
|
+
end
|
|
32
|
+
|
|
16
33
|
# Marks message as consumed in an async way.
|
|
17
34
|
#
|
|
18
35
|
# @param message [Messages::Message] last successfully processed message.
|
|
@@ -76,13 +93,6 @@ module Karafka
|
|
|
76
93
|
commit_offsets(async: false)
|
|
77
94
|
end
|
|
78
95
|
|
|
79
|
-
# No actions needed for the standard flow here
|
|
80
|
-
def handle_before_enqueue
|
|
81
|
-
Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
|
|
82
|
-
|
|
83
|
-
nil
|
|
84
|
-
end
|
|
85
|
-
|
|
86
96
|
# Increment number of attempts
|
|
87
97
|
def handle_before_consume
|
|
88
98
|
coordinator.pause_tracker.increment
|
|
@@ -83,7 +83,10 @@ module Karafka
|
|
|
83
83
|
)
|
|
84
84
|
ensure
|
|
85
85
|
# job can be nil when the queue is being closed
|
|
86
|
-
|
|
86
|
+
if job
|
|
87
|
+
@jobs_queue.complete(job)
|
|
88
|
+
job.finish!
|
|
89
|
+
end
|
|
87
90
|
|
|
88
91
|
# Always publish info, that we completed all the work despite its result
|
|
89
92
|
Karafka.monitor.instrument('worker.completed', instrument_details)
|
|
@@ -10,11 +10,12 @@ module Karafka
|
|
|
10
10
|
# @param target [Object] target object to which we proxy any DSL call
|
|
11
11
|
# @param defaults [Proc] defaults for target that should be applicable after the proper
|
|
12
12
|
# proxy context (if needed)
|
|
13
|
-
# @param block [Proc] block that we want to evaluate in the proxy context
|
|
13
|
+
# @param block [Proc, nil] block that we want to evaluate in the proxy context or nil if no
|
|
14
|
+
# proxy block context for example because whole context is taken from defaults
|
|
14
15
|
def initialize(target, defaults = ->(_) {}, &block)
|
|
15
16
|
@target = target
|
|
16
|
-
instance_eval(&block)
|
|
17
|
-
instance_eval(&defaults)
|
|
17
|
+
instance_eval(&block) if block
|
|
18
|
+
instance_eval(&defaults) if defaults
|
|
18
19
|
end
|
|
19
20
|
|
|
20
21
|
# Ruby 2.7.0 to 2.7.2 do not have arg forwarding, so we fallback to the old way
|
|
@@ -9,7 +9,7 @@ module Karafka
|
|
|
9
9
|
include Enumerable
|
|
10
10
|
extend Forwardable
|
|
11
11
|
|
|
12
|
-
def_delegators :@accumulator, :[], :size, :empty?, :last,
|
|
12
|
+
def_delegators :@accumulator, :[], :size, :empty?, :last, :<<, :map!, :sort_by!, :reverse!
|
|
13
13
|
|
|
14
14
|
# @param topics_array [Array<Karafka::Routing::Topic>] array with topics
|
|
15
15
|
def initialize(topics_array)
|