karafka 2.2.13 → 2.3.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +38 -12
- data/.ruby-version +1 -1
- data/CHANGELOG.md +161 -125
- data/Gemfile.lock +12 -12
- data/README.md +0 -2
- data/SECURITY.md +23 -0
- data/config/locales/errors.yml +7 -1
- data/config/locales/pro_errors.yml +22 -0
- data/docker-compose.yml +3 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin/acl.rb +287 -0
- data/lib/karafka/admin.rb +118 -16
- data/lib/karafka/app.rb +12 -3
- data/lib/karafka/base_consumer.rb +32 -31
- data/lib/karafka/cli/base.rb +1 -1
- data/lib/karafka/connection/client.rb +94 -84
- data/lib/karafka/connection/conductor.rb +28 -0
- data/lib/karafka/connection/listener.rb +165 -46
- data/lib/karafka/connection/listeners_batch.rb +5 -11
- data/lib/karafka/connection/manager.rb +72 -0
- data/lib/karafka/connection/messages_buffer.rb +12 -0
- data/lib/karafka/connection/proxy.rb +17 -0
- data/lib/karafka/connection/status.rb +75 -0
- data/lib/karafka/contracts/config.rb +14 -10
- data/lib/karafka/contracts/consumer_group.rb +9 -1
- data/lib/karafka/contracts/topic.rb +3 -1
- data/lib/karafka/errors.rb +13 -0
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +3 -9
- data/lib/karafka/instrumentation/notifications.rb +19 -9
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
- data/lib/karafka/pro/base_consumer.rb +47 -0
- data/lib/karafka/pro/connection/manager.rb +300 -0
- data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
- data/lib/karafka/pro/iterator.rb +1 -6
- data/lib/karafka/pro/loader.rb +16 -2
- data/lib/karafka/pro/processing/coordinator.rb +2 -1
- data/lib/karafka/pro/processing/executor.rb +37 -0
- data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
- data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
- data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
- data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
- data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
- data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +136 -3
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
- data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
- data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
- data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
- data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
- data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
- data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
- data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
- data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
- data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
- data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
- data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
- data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
- data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
- data/lib/karafka/process.rb +5 -3
- data/lib/karafka/processing/coordinator.rb +5 -1
- data/lib/karafka/processing/executor.rb +43 -13
- data/lib/karafka/processing/executors_buffer.rb +22 -7
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +19 -8
- data/lib/karafka/processing/schedulers/default.rb +42 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +23 -7
- data/lib/karafka/processing/strategies/dlq.rb +36 -0
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/builder.rb +12 -2
- data/lib/karafka/routing/consumer_group.rb +5 -5
- data/lib/karafka/routing/features/base.rb +44 -8
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
- data/lib/karafka/routing/topic.rb +8 -10
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/runner.rb +13 -3
- data/lib/karafka/server.rb +5 -9
- data/lib/karafka/setup/config.rb +21 -1
- data/lib/karafka/status.rb +23 -14
- data/lib/karafka/templates/karafka.rb.erb +7 -0
- data/lib/karafka/time_trackers/partition_usage.rb +56 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +47 -13
- metadata.gz.sig +0 -0
- data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -74
- data/lib/karafka/processing/scheduler.rb +0 -38
|
@@ -11,6 +11,15 @@ module Karafka
|
|
|
11
11
|
#
|
|
12
12
|
# @note Executors are not removed after partition is revoked. They are not that big and will
|
|
13
13
|
# be re-used in case of a re-claim
|
|
14
|
+
#
|
|
15
|
+
# @note Since given consumer can run various operations, executor manages that and its
|
|
16
|
+
# lifecycle. There are following types of operations with appropriate before/after, etc:
|
|
17
|
+
#
|
|
18
|
+
# - consume - primary operation related to running user consumption code
|
|
19
|
+
# - idle - cleanup job that runs on idle runs where no messages would be passed to the end
|
|
20
|
+
# user. This is used for complex flows with filters, etc
|
|
21
|
+
# - revoked - runs after the partition was revoked
|
|
22
|
+
# - shutdown - runs when process is going to shutdown
|
|
14
23
|
class Executor
|
|
15
24
|
extend Forwardable
|
|
16
25
|
|
|
@@ -39,11 +48,11 @@ module Karafka
|
|
|
39
48
|
end
|
|
40
49
|
|
|
41
50
|
# Allows us to prepare the consumer in the listener thread prior to the job being send to
|
|
42
|
-
#
|
|
51
|
+
# be scheduled. It also allows to run some code that is time sensitive and cannot wait in the
|
|
43
52
|
# queue as it could cause starvation.
|
|
44
53
|
#
|
|
45
54
|
# @param messages [Array<Karafka::Messages::Message>]
|
|
46
|
-
def
|
|
55
|
+
def before_schedule_consume(messages)
|
|
47
56
|
# Recreate consumer with each batch if persistence is not enabled
|
|
48
57
|
# We reload the consumers with each batch instead of relying on some external signals
|
|
49
58
|
# when needed for consistency. That way devs may have it on or off and not in this
|
|
@@ -60,7 +69,7 @@ module Karafka
|
|
|
60
69
|
Time.now
|
|
61
70
|
)
|
|
62
71
|
|
|
63
|
-
consumer.
|
|
72
|
+
consumer.on_before_schedule_consume
|
|
64
73
|
end
|
|
65
74
|
|
|
66
75
|
# Runs setup and warm-up code in the worker prior to running the consumption
|
|
@@ -79,23 +88,23 @@ module Karafka
|
|
|
79
88
|
consumer.on_after_consume
|
|
80
89
|
end
|
|
81
90
|
|
|
91
|
+
# Runs the code needed before idle work is scheduled
|
|
92
|
+
def before_schedule_idle
|
|
93
|
+
consumer.on_before_schedule_idle
|
|
94
|
+
end
|
|
95
|
+
|
|
82
96
|
# Runs consumer idle operations
|
|
83
97
|
# This may include house-keeping or other state management changes that can occur but that
|
|
84
98
|
# not mean there are any new messages available for the end user to process
|
|
85
99
|
def idle
|
|
86
|
-
# Initializes the messages set in case idle operation would happen before any processing
|
|
87
|
-
# This prevents us from having no messages object at all as the messages object and
|
|
88
|
-
# its metadata may be used for statistics
|
|
89
|
-
consumer.messages ||= Messages::Builders::Messages.call(
|
|
90
|
-
[],
|
|
91
|
-
topic,
|
|
92
|
-
partition,
|
|
93
|
-
Time.now
|
|
94
|
-
)
|
|
95
|
-
|
|
96
100
|
consumer.on_idle
|
|
97
101
|
end
|
|
98
102
|
|
|
103
|
+
# Runs code needed before revoked job is scheduled
|
|
104
|
+
def before_schedule_revoked
|
|
105
|
+
consumer.on_before_schedule_revoked if @consumer
|
|
106
|
+
end
|
|
107
|
+
|
|
99
108
|
# Runs the controller `#revoked` method that should be triggered when a given consumer is
|
|
100
109
|
# no longer needed due to partitions reassignment.
|
|
101
110
|
#
|
|
@@ -112,6 +121,11 @@ module Karafka
|
|
|
112
121
|
consumer.on_revoked if @consumer
|
|
113
122
|
end
|
|
114
123
|
|
|
124
|
+
# Runs code needed before shutdown job is scheduled
|
|
125
|
+
def before_schedule_shutdown
|
|
126
|
+
consumer.on_before_schedule_shutdown if @consumer
|
|
127
|
+
end
|
|
128
|
+
|
|
115
129
|
# Runs the controller `#shutdown` method that should be triggered when a given consumer is
|
|
116
130
|
# no longer needed as we're closing the process.
|
|
117
131
|
#
|
|
@@ -146,10 +160,26 @@ module Karafka
|
|
|
146
160
|
consumer.client = @client
|
|
147
161
|
consumer.producer = ::Karafka::App.producer
|
|
148
162
|
consumer.coordinator = @coordinator
|
|
163
|
+
# Since we have some message-less flows (idle, etc), we initialize consumer with empty
|
|
164
|
+
# messages set. In production we have persistent consumers, so this is not a performance
|
|
165
|
+
# overhead as this will happen only once per consumer lifetime
|
|
166
|
+
consumer.messages = empty_messages
|
|
149
167
|
|
|
150
168
|
consumer
|
|
151
169
|
end
|
|
152
170
|
end
|
|
171
|
+
|
|
172
|
+
# Initializes the messages set in case given operation would happen before any processing
|
|
173
|
+
# This prevents us from having no messages object at all as the messages object and
|
|
174
|
+
# its metadata may be used for statistics
|
|
175
|
+
def empty_messages
|
|
176
|
+
Messages::Builders::Messages.call(
|
|
177
|
+
[],
|
|
178
|
+
topic,
|
|
179
|
+
partition,
|
|
180
|
+
Time.now
|
|
181
|
+
)
|
|
182
|
+
end
|
|
153
183
|
end
|
|
154
184
|
end
|
|
155
185
|
end
|
|
@@ -13,6 +13,7 @@ module Karafka
|
|
|
13
13
|
@client = client
|
|
14
14
|
# We need two layers here to keep track of topics, partitions and processing groups
|
|
15
15
|
@buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
|
|
16
|
+
@executor_class = Karafka::App.config.internal.processing.executor_class
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
# Finds or creates an executor based on the provided details
|
|
@@ -21,15 +22,28 @@ module Karafka
|
|
|
21
22
|
# @param partition [Integer] partition number
|
|
22
23
|
# @param parallel_key [String] parallel group key
|
|
23
24
|
# @param coordinator [Karafka::Processing::Coordinator]
|
|
24
|
-
# @return [Executor] consumer executor
|
|
25
|
+
# @return [Executor, Pro::Processing::Executor] consumer executor
|
|
25
26
|
def find_or_create(topic, partition, parallel_key, coordinator)
|
|
26
|
-
@buffer[topic][partition][parallel_key] ||=
|
|
27
|
+
@buffer[topic][partition][parallel_key] ||= @executor_class.new(
|
|
27
28
|
@subscription_group.id,
|
|
28
29
|
@client,
|
|
29
30
|
coordinator
|
|
30
31
|
)
|
|
31
32
|
end
|
|
32
33
|
|
|
34
|
+
# Finds all existing executors for given topic partition or creates one for it
|
|
35
|
+
# @param topic [String] topic name
|
|
36
|
+
# @param partition [Integer] partition number
|
|
37
|
+
# @param coordinator [Karafka::Processing::Coordinator]
|
|
38
|
+
# @return [Array<Executor, Pro::Processing::Executor>]
|
|
39
|
+
def find_all_or_create(topic, partition, coordinator)
|
|
40
|
+
existing = find_all(topic, partition)
|
|
41
|
+
|
|
42
|
+
return existing unless existing.empty?
|
|
43
|
+
|
|
44
|
+
[find_or_create(topic, partition, 0, coordinator)]
|
|
45
|
+
end
|
|
46
|
+
|
|
33
47
|
# Revokes executors of a given topic partition, so they won't be used anymore for incoming
|
|
34
48
|
# messages
|
|
35
49
|
#
|
|
@@ -43,7 +57,8 @@ module Karafka
|
|
|
43
57
|
#
|
|
44
58
|
# @param topic [String] topic name
|
|
45
59
|
# @param partition [Integer] partition number
|
|
46
|
-
# @return [Array<Executor>] executors in use for this
|
|
60
|
+
# @return [Array<Executor, Pro::Processing::Executor>] executors in use for this
|
|
61
|
+
# topic + partition
|
|
47
62
|
def find_all(topic, partition)
|
|
48
63
|
@buffer[topic][partition].values
|
|
49
64
|
end
|
|
@@ -52,11 +67,11 @@ module Karafka
|
|
|
52
67
|
# info
|
|
53
68
|
# @yieldparam [Routing::Topic] karafka routing topic object
|
|
54
69
|
# @yieldparam [Integer] partition number
|
|
55
|
-
# @yieldparam [Executor] given executor
|
|
70
|
+
# @yieldparam [Executor, Pro::Processing::Executor] given executor
|
|
56
71
|
def each
|
|
57
|
-
@buffer.
|
|
58
|
-
partitions.
|
|
59
|
-
executors.
|
|
72
|
+
@buffer.each_value do |partitions|
|
|
73
|
+
partitions.each_value do |executors|
|
|
74
|
+
executors.each_value do |executor|
|
|
60
75
|
yield(executor)
|
|
61
76
|
end
|
|
62
77
|
end
|
|
@@ -20,11 +20,14 @@ module Karafka
|
|
|
20
20
|
# All jobs are blocking by default and they can release the lock when blocking operations
|
|
21
21
|
# are done (if needed)
|
|
22
22
|
@non_blocking = false
|
|
23
|
+
@status = :pending
|
|
23
24
|
end
|
|
24
25
|
|
|
25
|
-
# When redefined can run any code prior to the job being
|
|
26
|
+
# When redefined can run any code prior to the job being scheduled
|
|
26
27
|
# @note This will run in the listener thread and not in the worker
|
|
27
|
-
def
|
|
28
|
+
def before_schedule
|
|
29
|
+
raise NotImplementedError, 'Please implement in a subclass'
|
|
30
|
+
end
|
|
28
31
|
|
|
29
32
|
# When redefined can run any code that should run before executing the proper code
|
|
30
33
|
def before_call; end
|
|
@@ -49,6 +52,20 @@ module Karafka
|
|
|
49
52
|
def non_blocking?
|
|
50
53
|
@non_blocking
|
|
51
54
|
end
|
|
55
|
+
|
|
56
|
+
# @return [Boolean] was this job finished.
|
|
57
|
+
def finished?
|
|
58
|
+
@status == :finished
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Marks the job as finished. Used by the worker to indicate, that this job is done.
|
|
62
|
+
#
|
|
63
|
+
# @note Since the scheduler knows exactly when it schedules jobs and when it keeps them
|
|
64
|
+
# pending, we do not need advanced state tracking and the only information from the
|
|
65
|
+
# "outside" is whether it was finished or not after it was scheduled for execution.
|
|
66
|
+
def finish!
|
|
67
|
+
@status = :finished
|
|
68
|
+
end
|
|
52
69
|
end
|
|
53
70
|
end
|
|
54
71
|
end
|
|
@@ -20,9 +20,9 @@ module Karafka
|
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
# Runs all the preparation code on the executor that needs to happen before the job is
|
|
23
|
-
#
|
|
24
|
-
def
|
|
25
|
-
executor.
|
|
23
|
+
# scheduled.
|
|
24
|
+
def before_schedule
|
|
25
|
+
executor.before_schedule_consume(@messages)
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
# Runs the before consumption preparations on the executor
|
|
@@ -23,12 +23,7 @@ module Karafka
|
|
|
23
23
|
# scheduled by Ruby hundreds of thousands of times per group.
|
|
24
24
|
# We cannot use a single semaphore as it could potentially block in listeners that should
|
|
25
25
|
# process with their data and also could unlock when a given group needs to remain locked
|
|
26
|
-
@semaphores =
|
|
27
|
-
# Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
|
|
28
|
-
# versions we use our custom queue wrapper
|
|
29
|
-
h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
|
|
30
|
-
end
|
|
31
|
-
|
|
26
|
+
@semaphores = {}
|
|
32
27
|
@concurrency = Karafka::App.config.concurrency
|
|
33
28
|
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
|
34
29
|
@in_processing = Hash.new { |h, k| h[k] = [] }
|
|
@@ -37,6 +32,22 @@ module Karafka
|
|
|
37
32
|
@mutex = Mutex.new
|
|
38
33
|
end
|
|
39
34
|
|
|
35
|
+
# Registers given subscription group id in the queue. It is needed so we do not dynamically
|
|
36
|
+
# create semaphore, hence avoiding potential race conditions
|
|
37
|
+
#
|
|
38
|
+
# @param group_id [String]
|
|
39
|
+
def register(group_id)
|
|
40
|
+
# Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
|
|
41
|
+
@mutex.synchronize do
|
|
42
|
+
# versions we use our custom queue wrapper
|
|
43
|
+
#
|
|
44
|
+
# Initializes this semaphore from the mutex, so it is never auto-created
|
|
45
|
+
# Since we always schedule a job before waiting using semaphores, there won't be any
|
|
46
|
+
# concurrency problems
|
|
47
|
+
@semaphores[group_id] = RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
40
51
|
# Adds the job to the internal main queue, scheduling it for execution in a worker and marks
|
|
41
52
|
# this job as in processing pipeline.
|
|
42
53
|
#
|
|
@@ -79,7 +90,7 @@ module Karafka
|
|
|
79
90
|
# @param group_id [String] id of the group we want to unlock for one tick
|
|
80
91
|
# @note This does not release the wait lock. It just causes a conditions recheck
|
|
81
92
|
def tick(group_id)
|
|
82
|
-
@semaphores
|
|
93
|
+
@semaphores.fetch(group_id) << true
|
|
83
94
|
end
|
|
84
95
|
|
|
85
96
|
# Marks a given job from a given group as completed. When there are no more jobs from a given
|
|
@@ -149,7 +160,7 @@ module Karafka
|
|
|
149
160
|
while wait?(group_id)
|
|
150
161
|
yield if block_given?
|
|
151
162
|
|
|
152
|
-
@semaphores
|
|
163
|
+
@semaphores.fetch(group_id).pop(timeout: @tick_interval / 1_000.0)
|
|
153
164
|
end
|
|
154
165
|
end
|
|
155
166
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Namespace for Karafka OSS schedulers
|
|
6
|
+
module Schedulers
|
|
7
|
+
# FIFO scheduler for messages coming from various topics and partitions
|
|
8
|
+
class Default
|
|
9
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
|
10
|
+
def initialize(queue)
|
|
11
|
+
@queue = queue
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Schedules jobs in the fifo order
|
|
15
|
+
#
|
|
16
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Consume>] jobs we want to schedule
|
|
17
|
+
def on_schedule_consumption(jobs_array)
|
|
18
|
+
jobs_array.each do |job|
|
|
19
|
+
@queue << job
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Revocation, shutdown and idle jobs can also run in fifo by default
|
|
24
|
+
alias on_schedule_revocation on_schedule_consumption
|
|
25
|
+
alias on_schedule_shutdown on_schedule_consumption
|
|
26
|
+
alias on_schedule_idle on_schedule_consumption
|
|
27
|
+
|
|
28
|
+
# This scheduler does not have anything to manage as it is a pass through and has no state
|
|
29
|
+
def on_manage
|
|
30
|
+
nil
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# This scheduler does not need to be cleared because it is stateless
|
|
34
|
+
#
|
|
35
|
+
# @param _group_id [String] Subscription group id
|
|
36
|
+
def on_clear(_group_id)
|
|
37
|
+
nil
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -11,10 +11,19 @@ module Karafka
|
|
|
11
11
|
module Strategies
|
|
12
12
|
# Base strategy that should be included in each strategy, just to ensure the API
|
|
13
13
|
module Base
|
|
14
|
-
#
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
# Defines all the before schedule handlers for appropriate actions
|
|
15
|
+
%i[
|
|
16
|
+
consume
|
|
17
|
+
idle
|
|
18
|
+
revoked
|
|
19
|
+
shutdown
|
|
20
|
+
].each do |action|
|
|
21
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
|
22
|
+
def handle_before_schedule_#{action}
|
|
23
|
+
# What should happen before scheduling this work
|
|
24
|
+
raise NotImplementedError, 'Implement in a subclass'
|
|
25
|
+
end
|
|
26
|
+
RUBY
|
|
18
27
|
end
|
|
19
28
|
|
|
20
29
|
# What should happen before we kick in the processing
|
|
@@ -13,6 +13,23 @@ module Karafka
|
|
|
13
13
|
# Apply strategy for a non-feature based flow
|
|
14
14
|
FEATURES = %i[].freeze
|
|
15
15
|
|
|
16
|
+
# By default on all "before schedule" we just run instrumentation, nothing more
|
|
17
|
+
%i[
|
|
18
|
+
consume
|
|
19
|
+
idle
|
|
20
|
+
revoked
|
|
21
|
+
shutdown
|
|
22
|
+
].each do |action|
|
|
23
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
|
24
|
+
# No actions needed for the standard flow here
|
|
25
|
+
def handle_before_schedule_#{action}
|
|
26
|
+
Karafka.monitor.instrument('consumer.before_schedule_#{action}', caller: self)
|
|
27
|
+
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
RUBY
|
|
31
|
+
end
|
|
32
|
+
|
|
16
33
|
# Marks message as consumed in an async way.
|
|
17
34
|
#
|
|
18
35
|
# @param message [Messages::Message] last successfully processed message.
|
|
@@ -24,6 +41,9 @@ module Karafka
|
|
|
24
41
|
# already processed but rather at the next one. This applies to both sync and async
|
|
25
42
|
# versions of this method.
|
|
26
43
|
def mark_as_consumed(message)
|
|
44
|
+
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
|
45
|
+
# In case like this we ignore marking
|
|
46
|
+
return true if coordinator.seek_offset.nil?
|
|
27
47
|
# Ignore earlier offsets than the one we already committed
|
|
28
48
|
return true if coordinator.seek_offset > message.offset
|
|
29
49
|
return false if revoked?
|
|
@@ -40,6 +60,9 @@ module Karafka
|
|
|
40
60
|
# @return [Boolean] true if we were able to mark the offset, false otherwise.
|
|
41
61
|
# False indicates that we were not able and that we have lost the partition.
|
|
42
62
|
def mark_as_consumed!(message)
|
|
63
|
+
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
|
64
|
+
# In case like this we ignore marking
|
|
65
|
+
return true if coordinator.seek_offset.nil?
|
|
43
66
|
# Ignore earlier offsets than the one we already committed
|
|
44
67
|
return true if coordinator.seek_offset > message.offset
|
|
45
68
|
return false if revoked?
|
|
@@ -76,13 +99,6 @@ module Karafka
|
|
|
76
99
|
commit_offsets(async: false)
|
|
77
100
|
end
|
|
78
101
|
|
|
79
|
-
# No actions needed for the standard flow here
|
|
80
|
-
def handle_before_enqueue
|
|
81
|
-
Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
|
|
82
|
-
|
|
83
|
-
nil
|
|
84
|
-
end
|
|
85
|
-
|
|
86
102
|
# Increment number of attempts
|
|
87
103
|
def handle_before_consume
|
|
88
104
|
coordinator.pause_tracker.increment
|
|
@@ -14,6 +14,42 @@ module Karafka
|
|
|
14
14
|
dead_letter_queue
|
|
15
15
|
].freeze
|
|
16
16
|
|
|
17
|
+
# Override of the standard `#mark_as_consumed` in order to handle the pause tracker
|
|
18
|
+
# reset in case DLQ is marked as fully independent. When DLQ is marked independent,
|
|
19
|
+
# any offset marking causes the pause count tracker to reset. This is useful when
|
|
20
|
+
# the error is not due to the collective batch operations state but due to intermediate
|
|
21
|
+
# "crawling" errors that move with it
|
|
22
|
+
#
|
|
23
|
+
# @see `Strategies::Default#mark_as_consumed` for more details
|
|
24
|
+
# @param message [Messages::Message]
|
|
25
|
+
def mark_as_consumed(message)
|
|
26
|
+
# If we are not retrying pause count is already 0, no need to try to reset the state
|
|
27
|
+
return super unless retrying?
|
|
28
|
+
# If we do not use independent marking on DLQ, we just mark as consumed
|
|
29
|
+
return super unless topic.dead_letter_queue.independent?
|
|
30
|
+
# If we were not able to mark no need to reset
|
|
31
|
+
return false unless super
|
|
32
|
+
|
|
33
|
+
coordinator.pause_tracker.reset
|
|
34
|
+
|
|
35
|
+
true
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Override of the standard `#mark_as_consumed!`. Resets the pause tracker count in case
|
|
39
|
+
# DLQ was configured with the `independent` flag.
|
|
40
|
+
#
|
|
41
|
+
# @see `Strategies::Default#mark_as_consumed!` for more details
|
|
42
|
+
# @param message [Messages::Message]
|
|
43
|
+
def mark_as_consumed!(message)
|
|
44
|
+
return super unless retrying?
|
|
45
|
+
return super unless topic.dead_letter_queue.independent?
|
|
46
|
+
return false unless super
|
|
47
|
+
|
|
48
|
+
coordinator.pause_tracker.reset
|
|
49
|
+
|
|
50
|
+
true
|
|
51
|
+
end
|
|
52
|
+
|
|
17
53
|
# When manual offset management is on, we do not mark anything as consumed automatically
|
|
18
54
|
# and we rely on the user to figure things out
|
|
19
55
|
def handle_after_consume
|
|
@@ -83,7 +83,10 @@ module Karafka
|
|
|
83
83
|
)
|
|
84
84
|
ensure
|
|
85
85
|
# job can be nil when the queue is being closed
|
|
86
|
-
|
|
86
|
+
if job
|
|
87
|
+
@jobs_queue.complete(job)
|
|
88
|
+
job.finish!
|
|
89
|
+
end
|
|
87
90
|
|
|
88
91
|
# Always publish info, that we completed all the work despite its result
|
|
89
92
|
Karafka.monitor.instrument('worker.completed', instrument_details)
|
|
@@ -109,10 +109,20 @@ module Karafka
|
|
|
109
109
|
# subscription group customization
|
|
110
110
|
# @param subscription_group_name [String, Symbol] subscription group id. When not provided,
|
|
111
111
|
# a random uuid will be used
|
|
112
|
+
# @param args [Array] any extra arguments accepted by the subscription group builder
|
|
112
113
|
# @param block [Proc] further topics definitions
|
|
113
|
-
def subscription_group(
|
|
114
|
+
def subscription_group(
|
|
115
|
+
subscription_group_name = SubscriptionGroup.id,
|
|
116
|
+
**args,
|
|
117
|
+
&block
|
|
118
|
+
)
|
|
114
119
|
consumer_group('app') do
|
|
115
|
-
target.public_send(
|
|
120
|
+
target.public_send(
|
|
121
|
+
:subscription_group=,
|
|
122
|
+
subscription_group_name.to_s,
|
|
123
|
+
**args,
|
|
124
|
+
&block
|
|
125
|
+
)
|
|
116
126
|
end
|
|
117
127
|
end
|
|
118
128
|
|
|
@@ -14,7 +14,7 @@ module Karafka
|
|
|
14
14
|
# It allows us to store the "current" subscription group defined in the routing
|
|
15
15
|
# This subscription group id is then injected into topics, so we can compute the subscription
|
|
16
16
|
# groups
|
|
17
|
-
attr_accessor :
|
|
17
|
+
attr_accessor :current_subscription_group_details
|
|
18
18
|
|
|
19
19
|
# @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
|
|
20
20
|
# yet have an application client_id namespace, this will be added here by default.
|
|
@@ -26,7 +26,7 @@ module Karafka
|
|
|
26
26
|
@topics = Topics.new([])
|
|
27
27
|
# Initialize the subscription group so there's always a value for it, since even if not
|
|
28
28
|
# defined directly, a subscription group will be created
|
|
29
|
-
@
|
|
29
|
+
@current_subscription_group_details = { name: SubscriptionGroup.id }
|
|
30
30
|
end
|
|
31
31
|
|
|
32
32
|
# @return [Boolean] true if this consumer group should be active in our current process
|
|
@@ -48,7 +48,7 @@ module Karafka
|
|
|
48
48
|
built_topic = @topics.last
|
|
49
49
|
# We overwrite it conditionally in case it was not set by the user inline in the topic
|
|
50
50
|
# block definition
|
|
51
|
-
built_topic.
|
|
51
|
+
built_topic.subscription_group_details ||= current_subscription_group_details
|
|
52
52
|
built_topic
|
|
53
53
|
end
|
|
54
54
|
|
|
@@ -59,13 +59,13 @@ module Karafka
|
|
|
59
59
|
def subscription_group=(name = SubscriptionGroup.id, &block)
|
|
60
60
|
# We cast it here, so the routing supports symbol based but that's anyhow later on
|
|
61
61
|
# validated as a string
|
|
62
|
-
@
|
|
62
|
+
@current_subscription_group_details = { name: name.to_s }
|
|
63
63
|
|
|
64
64
|
Proxy.new(self, &block)
|
|
65
65
|
|
|
66
66
|
# We need to reset the current subscription group after it is used, so it won't leak
|
|
67
67
|
# outside to other topics that would be defined without a defined subscription group
|
|
68
|
-
@
|
|
68
|
+
@current_subscription_group_details = { name: SubscriptionGroup.id }
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
# @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
|
|
@@ -13,17 +13,46 @@ module Karafka
|
|
|
13
13
|
class << self
|
|
14
14
|
# Extends topic and builder with given feature API
|
|
15
15
|
def activate
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
if const_defined?('Topic', false)
|
|
17
|
+
Topic.prepend(self::Topic)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
if const_defined?('Topics', false)
|
|
21
|
+
Topics.prepend(self::Topics)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
if const_defined?('ConsumerGroup', false)
|
|
25
|
+
ConsumerGroup.prepend(self::ConsumerGroup)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
if const_defined?('Proxy', false)
|
|
29
|
+
Proxy.prepend(self::Proxy)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
if const_defined?('Builder', false)
|
|
33
|
+
Builder.prepend(self::Builder)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
if const_defined?('Contracts', false)
|
|
37
|
+
Builder.prepend(Base::Expander.new(self))
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
if const_defined?('SubscriptionGroup', false)
|
|
41
|
+
SubscriptionGroup.prepend(self::SubscriptionGroup)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
if const_defined?('SubscriptionGroupsBuilder', false)
|
|
45
|
+
SubscriptionGroupsBuilder.prepend(self::SubscriptionGroupsBuilder)
|
|
46
|
+
end
|
|
22
47
|
end
|
|
23
48
|
|
|
24
|
-
# Loads all the features and activates them
|
|
49
|
+
# Loads all the features and activates them once
|
|
25
50
|
def load_all
|
|
51
|
+
return if @loaded
|
|
52
|
+
|
|
26
53
|
features.each(&:activate)
|
|
54
|
+
|
|
55
|
+
@loaded = true
|
|
27
56
|
end
|
|
28
57
|
|
|
29
58
|
# @param config [Karafka::Core::Configurable::Node] app config that we can alter with
|
|
@@ -41,11 +70,18 @@ module Karafka
|
|
|
41
70
|
|
|
42
71
|
private
|
|
43
72
|
|
|
44
|
-
# @return [Array<Class>] all available routing features
|
|
73
|
+
# @return [Array<Class>] all available routing features that are direct descendants of
|
|
74
|
+
# the features base.Approach with using `#superclass` prevents us from accidentally
|
|
75
|
+
# loading Pro components
|
|
45
76
|
def features
|
|
46
77
|
ObjectSpace
|
|
47
78
|
.each_object(Class)
|
|
48
79
|
.select { |klass| klass < self }
|
|
80
|
+
# Ensures, that Pro components are only loaded when we operate in Pro mode. Since
|
|
81
|
+
# outside of specs Zeitwerk does not require them at all, they will not be loaded
|
|
82
|
+
# anyhow, but for specs this needs to be done as RSpec requires all files to be
|
|
83
|
+
# present
|
|
84
|
+
.reject { |klass| Karafka.pro? ? false : klass.superclass != self }
|
|
49
85
|
.sort_by(&:to_s)
|
|
50
86
|
end
|
|
51
87
|
|
|
@@ -11,8 +11,13 @@ module Karafka
|
|
|
11
11
|
:max_retries,
|
|
12
12
|
# To what topic the skipped messages should be moved
|
|
13
13
|
:topic,
|
|
14
|
+
# Should retries be handled collectively on a batch or independently per message
|
|
15
|
+
:independent,
|
|
14
16
|
keyword_init: true
|
|
15
|
-
)
|
|
17
|
+
) do
|
|
18
|
+
alias_method :active?, :active
|
|
19
|
+
alias_method :independent?, :independent
|
|
20
|
+
end
|
|
16
21
|
end
|
|
17
22
|
end
|
|
18
23
|
end
|