karafka 2.2.13 → 2.3.0.alpha1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +38 -12
- data/.ruby-version +1 -1
- data/CHANGELOG.md +161 -125
- data/Gemfile.lock +12 -12
- data/README.md +0 -2
- data/SECURITY.md +23 -0
- data/config/locales/errors.yml +7 -1
- data/config/locales/pro_errors.yml +22 -0
- data/docker-compose.yml +3 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin/acl.rb +287 -0
- data/lib/karafka/admin.rb +118 -16
- data/lib/karafka/app.rb +12 -3
- data/lib/karafka/base_consumer.rb +32 -31
- data/lib/karafka/cli/base.rb +1 -1
- data/lib/karafka/connection/client.rb +94 -84
- data/lib/karafka/connection/conductor.rb +28 -0
- data/lib/karafka/connection/listener.rb +165 -46
- data/lib/karafka/connection/listeners_batch.rb +5 -11
- data/lib/karafka/connection/manager.rb +72 -0
- data/lib/karafka/connection/messages_buffer.rb +12 -0
- data/lib/karafka/connection/proxy.rb +17 -0
- data/lib/karafka/connection/status.rb +75 -0
- data/lib/karafka/contracts/config.rb +14 -10
- data/lib/karafka/contracts/consumer_group.rb +9 -1
- data/lib/karafka/contracts/topic.rb +3 -1
- data/lib/karafka/errors.rb +13 -0
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +3 -9
- data/lib/karafka/instrumentation/notifications.rb +19 -9
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
- data/lib/karafka/pro/base_consumer.rb +47 -0
- data/lib/karafka/pro/connection/manager.rb +300 -0
- data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
- data/lib/karafka/pro/iterator.rb +1 -6
- data/lib/karafka/pro/loader.rb +16 -2
- data/lib/karafka/pro/processing/coordinator.rb +2 -1
- data/lib/karafka/pro/processing/executor.rb +37 -0
- data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
- data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
- data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
- data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
- data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
- data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +136 -3
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
- data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
- data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
- data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
- data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
- data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
- data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
- data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
- data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
- data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
- data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
- data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
- data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
- data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
- data/lib/karafka/process.rb +5 -3
- data/lib/karafka/processing/coordinator.rb +5 -1
- data/lib/karafka/processing/executor.rb +43 -13
- data/lib/karafka/processing/executors_buffer.rb +22 -7
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +19 -8
- data/lib/karafka/processing/schedulers/default.rb +42 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +23 -7
- data/lib/karafka/processing/strategies/dlq.rb +36 -0
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/builder.rb +12 -2
- data/lib/karafka/routing/consumer_group.rb +5 -5
- data/lib/karafka/routing/features/base.rb +44 -8
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
- data/lib/karafka/routing/topic.rb +8 -10
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/runner.rb +13 -3
- data/lib/karafka/server.rb +5 -9
- data/lib/karafka/setup/config.rb +21 -1
- data/lib/karafka/status.rb +23 -14
- data/lib/karafka/templates/karafka.rb.erb +7 -0
- data/lib/karafka/time_trackers/partition_usage.rb +56 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +47 -13
- metadata.gz.sig +0 -0
- data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -74
- data/lib/karafka/processing/scheduler.rb +0 -38
@@ -11,6 +11,15 @@ module Karafka
|
|
11
11
|
#
|
12
12
|
# @note Executors are not removed after partition is revoked. They are not that big and will
|
13
13
|
# be re-used in case of a re-claim
|
14
|
+
#
|
15
|
+
# @note Since given consumer can run various operations, executor manages that and its
|
16
|
+
# lifecycle. There are following types of operations with appropriate before/after, etc:
|
17
|
+
#
|
18
|
+
# - consume - primary operation related to running user consumption code
|
19
|
+
# - idle - cleanup job that runs on idle runs where no messages would be passed to the end
|
20
|
+
# user. This is used for complex flows with filters, etc
|
21
|
+
# - revoked - runs after the partition was revoked
|
22
|
+
# - shutdown - runs when process is going to shutdown
|
14
23
|
class Executor
|
15
24
|
extend Forwardable
|
16
25
|
|
@@ -39,11 +48,11 @@ module Karafka
|
|
39
48
|
end
|
40
49
|
|
41
50
|
# Allows us to prepare the consumer in the listener thread prior to the job being send to
|
42
|
-
#
|
51
|
+
# be scheduled. It also allows to run some code that is time sensitive and cannot wait in the
|
43
52
|
# queue as it could cause starvation.
|
44
53
|
#
|
45
54
|
# @param messages [Array<Karafka::Messages::Message>]
|
46
|
-
def
|
55
|
+
def before_schedule_consume(messages)
|
47
56
|
# Recreate consumer with each batch if persistence is not enabled
|
48
57
|
# We reload the consumers with each batch instead of relying on some external signals
|
49
58
|
# when needed for consistency. That way devs may have it on or off and not in this
|
@@ -60,7 +69,7 @@ module Karafka
|
|
60
69
|
Time.now
|
61
70
|
)
|
62
71
|
|
63
|
-
consumer.
|
72
|
+
consumer.on_before_schedule_consume
|
64
73
|
end
|
65
74
|
|
66
75
|
# Runs setup and warm-up code in the worker prior to running the consumption
|
@@ -79,23 +88,23 @@ module Karafka
|
|
79
88
|
consumer.on_after_consume
|
80
89
|
end
|
81
90
|
|
91
|
+
# Runs the code needed before idle work is scheduled
|
92
|
+
def before_schedule_idle
|
93
|
+
consumer.on_before_schedule_idle
|
94
|
+
end
|
95
|
+
|
82
96
|
# Runs consumer idle operations
|
83
97
|
# This may include house-keeping or other state management changes that can occur but that
|
84
98
|
# not mean there are any new messages available for the end user to process
|
85
99
|
def idle
|
86
|
-
# Initializes the messages set in case idle operation would happen before any processing
|
87
|
-
# This prevents us from having no messages object at all as the messages object and
|
88
|
-
# its metadata may be used for statistics
|
89
|
-
consumer.messages ||= Messages::Builders::Messages.call(
|
90
|
-
[],
|
91
|
-
topic,
|
92
|
-
partition,
|
93
|
-
Time.now
|
94
|
-
)
|
95
|
-
|
96
100
|
consumer.on_idle
|
97
101
|
end
|
98
102
|
|
103
|
+
# Runs code needed before revoked job is scheduled
|
104
|
+
def before_schedule_revoked
|
105
|
+
consumer.on_before_schedule_revoked if @consumer
|
106
|
+
end
|
107
|
+
|
99
108
|
# Runs the controller `#revoked` method that should be triggered when a given consumer is
|
100
109
|
# no longer needed due to partitions reassignment.
|
101
110
|
#
|
@@ -112,6 +121,11 @@ module Karafka
|
|
112
121
|
consumer.on_revoked if @consumer
|
113
122
|
end
|
114
123
|
|
124
|
+
# Runs code needed before shutdown job is scheduled
|
125
|
+
def before_schedule_shutdown
|
126
|
+
consumer.on_before_schedule_shutdown if @consumer
|
127
|
+
end
|
128
|
+
|
115
129
|
# Runs the controller `#shutdown` method that should be triggered when a given consumer is
|
116
130
|
# no longer needed as we're closing the process.
|
117
131
|
#
|
@@ -146,10 +160,26 @@ module Karafka
|
|
146
160
|
consumer.client = @client
|
147
161
|
consumer.producer = ::Karafka::App.producer
|
148
162
|
consumer.coordinator = @coordinator
|
163
|
+
# Since we have some message-less flows (idle, etc), we initialize consumer with empty
|
164
|
+
# messages set. In production we have persistent consumers, so this is not a performance
|
165
|
+
# overhead as this will happen only once per consumer lifetime
|
166
|
+
consumer.messages = empty_messages
|
149
167
|
|
150
168
|
consumer
|
151
169
|
end
|
152
170
|
end
|
171
|
+
|
172
|
+
# Initializes the messages set in case given operation would happen before any processing
|
173
|
+
# This prevents us from having no messages object at all as the messages object and
|
174
|
+
# its metadata may be used for statistics
|
175
|
+
def empty_messages
|
176
|
+
Messages::Builders::Messages.call(
|
177
|
+
[],
|
178
|
+
topic,
|
179
|
+
partition,
|
180
|
+
Time.now
|
181
|
+
)
|
182
|
+
end
|
153
183
|
end
|
154
184
|
end
|
155
185
|
end
|
@@ -13,6 +13,7 @@ module Karafka
|
|
13
13
|
@client = client
|
14
14
|
# We need two layers here to keep track of topics, partitions and processing groups
|
15
15
|
@buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
|
16
|
+
@executor_class = Karafka::App.config.internal.processing.executor_class
|
16
17
|
end
|
17
18
|
|
18
19
|
# Finds or creates an executor based on the provided details
|
@@ -21,15 +22,28 @@ module Karafka
|
|
21
22
|
# @param partition [Integer] partition number
|
22
23
|
# @param parallel_key [String] parallel group key
|
23
24
|
# @param coordinator [Karafka::Processing::Coordinator]
|
24
|
-
# @return [Executor] consumer executor
|
25
|
+
# @return [Executor, Pro::Processing::Executor] consumer executor
|
25
26
|
def find_or_create(topic, partition, parallel_key, coordinator)
|
26
|
-
@buffer[topic][partition][parallel_key] ||=
|
27
|
+
@buffer[topic][partition][parallel_key] ||= @executor_class.new(
|
27
28
|
@subscription_group.id,
|
28
29
|
@client,
|
29
30
|
coordinator
|
30
31
|
)
|
31
32
|
end
|
32
33
|
|
34
|
+
# Finds all existing executors for given topic partition or creates one for it
|
35
|
+
# @param topic [String] topic name
|
36
|
+
# @param partition [Integer] partition number
|
37
|
+
# @param coordinator [Karafka::Processing::Coordinator]
|
38
|
+
# @return [Array<Executor, Pro::Processing::Executor>]
|
39
|
+
def find_all_or_create(topic, partition, coordinator)
|
40
|
+
existing = find_all(topic, partition)
|
41
|
+
|
42
|
+
return existing unless existing.empty?
|
43
|
+
|
44
|
+
[find_or_create(topic, partition, 0, coordinator)]
|
45
|
+
end
|
46
|
+
|
33
47
|
# Revokes executors of a given topic partition, so they won't be used anymore for incoming
|
34
48
|
# messages
|
35
49
|
#
|
@@ -43,7 +57,8 @@ module Karafka
|
|
43
57
|
#
|
44
58
|
# @param topic [String] topic name
|
45
59
|
# @param partition [Integer] partition number
|
46
|
-
# @return [Array<Executor>] executors in use for this
|
60
|
+
# @return [Array<Executor, Pro::Processing::Executor>] executors in use for this
|
61
|
+
# topic + partition
|
47
62
|
def find_all(topic, partition)
|
48
63
|
@buffer[topic][partition].values
|
49
64
|
end
|
@@ -52,11 +67,11 @@ module Karafka
|
|
52
67
|
# info
|
53
68
|
# @yieldparam [Routing::Topic] karafka routing topic object
|
54
69
|
# @yieldparam [Integer] partition number
|
55
|
-
# @yieldparam [Executor] given executor
|
70
|
+
# @yieldparam [Executor, Pro::Processing::Executor] given executor
|
56
71
|
def each
|
57
|
-
@buffer.
|
58
|
-
partitions.
|
59
|
-
executors.
|
72
|
+
@buffer.each_value do |partitions|
|
73
|
+
partitions.each_value do |executors|
|
74
|
+
executors.each_value do |executor|
|
60
75
|
yield(executor)
|
61
76
|
end
|
62
77
|
end
|
@@ -20,11 +20,14 @@ module Karafka
|
|
20
20
|
# All jobs are blocking by default and they can release the lock when blocking operations
|
21
21
|
# are done (if needed)
|
22
22
|
@non_blocking = false
|
23
|
+
@status = :pending
|
23
24
|
end
|
24
25
|
|
25
|
-
# When redefined can run any code prior to the job being
|
26
|
+
# When redefined can run any code prior to the job being scheduled
|
26
27
|
# @note This will run in the listener thread and not in the worker
|
27
|
-
def
|
28
|
+
def before_schedule
|
29
|
+
raise NotImplementedError, 'Please implement in a subclass'
|
30
|
+
end
|
28
31
|
|
29
32
|
# When redefined can run any code that should run before executing the proper code
|
30
33
|
def before_call; end
|
@@ -49,6 +52,20 @@ module Karafka
|
|
49
52
|
def non_blocking?
|
50
53
|
@non_blocking
|
51
54
|
end
|
55
|
+
|
56
|
+
# @return [Boolean] was this job finished.
|
57
|
+
def finished?
|
58
|
+
@status == :finished
|
59
|
+
end
|
60
|
+
|
61
|
+
# Marks the job as finished. Used by the worker to indicate, that this job is done.
|
62
|
+
#
|
63
|
+
# @note Since the scheduler knows exactly when it schedules jobs and when it keeps them
|
64
|
+
# pending, we do not need advanced state tracking and the only information from the
|
65
|
+
# "outside" is whether it was finished or not after it was scheduled for execution.
|
66
|
+
def finish!
|
67
|
+
@status = :finished
|
68
|
+
end
|
52
69
|
end
|
53
70
|
end
|
54
71
|
end
|
@@ -20,9 +20,9 @@ module Karafka
|
|
20
20
|
end
|
21
21
|
|
22
22
|
# Runs all the preparation code on the executor that needs to happen before the job is
|
23
|
-
#
|
24
|
-
def
|
25
|
-
executor.
|
23
|
+
# scheduled.
|
24
|
+
def before_schedule
|
25
|
+
executor.before_schedule_consume(@messages)
|
26
26
|
end
|
27
27
|
|
28
28
|
# Runs the before consumption preparations on the executor
|
@@ -23,12 +23,7 @@ module Karafka
|
|
23
23
|
# scheduled by Ruby hundreds of thousands of times per group.
|
24
24
|
# We cannot use a single semaphore as it could potentially block in listeners that should
|
25
25
|
# process with their data and also could unlock when a given group needs to remain locked
|
26
|
-
@semaphores =
|
27
|
-
# Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
|
28
|
-
# versions we use our custom queue wrapper
|
29
|
-
h.compute_if_absent(k) { RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new }
|
30
|
-
end
|
31
|
-
|
26
|
+
@semaphores = {}
|
32
27
|
@concurrency = Karafka::App.config.concurrency
|
33
28
|
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
34
29
|
@in_processing = Hash.new { |h, k| h[k] = [] }
|
@@ -37,6 +32,22 @@ module Karafka
|
|
37
32
|
@mutex = Mutex.new
|
38
33
|
end
|
39
34
|
|
35
|
+
# Registers given subscription group id in the queue. It is needed so we do not dynamically
|
36
|
+
# create semaphore, hence avoiding potential race conditions
|
37
|
+
#
|
38
|
+
# @param group_id [String]
|
39
|
+
def register(group_id)
|
40
|
+
# Ruby prior to 3.2 did not have queue with a timeout on `#pop`, that is why for those
|
41
|
+
@mutex.synchronize do
|
42
|
+
# versions we use our custom queue wrapper
|
43
|
+
#
|
44
|
+
# Initializes this semaphore from the mutex, so it is never auto-created
|
45
|
+
# Since we always schedule a job before waiting using semaphores, there won't be any
|
46
|
+
# concurrency problems
|
47
|
+
@semaphores[group_id] = RUBY_VERSION < '3.2' ? TimedQueue.new : Queue.new
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
40
51
|
# Adds the job to the internal main queue, scheduling it for execution in a worker and marks
|
41
52
|
# this job as in processing pipeline.
|
42
53
|
#
|
@@ -79,7 +90,7 @@ module Karafka
|
|
79
90
|
# @param group_id [String] id of the group we want to unlock for one tick
|
80
91
|
# @note This does not release the wait lock. It just causes a conditions recheck
|
81
92
|
def tick(group_id)
|
82
|
-
@semaphores
|
93
|
+
@semaphores.fetch(group_id) << true
|
83
94
|
end
|
84
95
|
|
85
96
|
# Marks a given job from a given group as completed. When there are no more jobs from a given
|
@@ -149,7 +160,7 @@ module Karafka
|
|
149
160
|
while wait?(group_id)
|
150
161
|
yield if block_given?
|
151
162
|
|
152
|
-
@semaphores
|
163
|
+
@semaphores.fetch(group_id).pop(timeout: @tick_interval / 1_000.0)
|
153
164
|
end
|
154
165
|
end
|
155
166
|
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Namespace for Karafka OSS schedulers
|
6
|
+
module Schedulers
|
7
|
+
# FIFO scheduler for messages coming from various topics and partitions
|
8
|
+
class Default
|
9
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
10
|
+
def initialize(queue)
|
11
|
+
@queue = queue
|
12
|
+
end
|
13
|
+
|
14
|
+
# Schedules jobs in the fifo order
|
15
|
+
#
|
16
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Consume>] jobs we want to schedule
|
17
|
+
def on_schedule_consumption(jobs_array)
|
18
|
+
jobs_array.each do |job|
|
19
|
+
@queue << job
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Revocation, shutdown and idle jobs can also run in fifo by default
|
24
|
+
alias on_schedule_revocation on_schedule_consumption
|
25
|
+
alias on_schedule_shutdown on_schedule_consumption
|
26
|
+
alias on_schedule_idle on_schedule_consumption
|
27
|
+
|
28
|
+
# This scheduler does not have anything to manage as it is a pass through and has no state
|
29
|
+
def on_manage
|
30
|
+
nil
|
31
|
+
end
|
32
|
+
|
33
|
+
# This scheduler does not need to be cleared because it is stateless
|
34
|
+
#
|
35
|
+
# @param _group_id [String] Subscription group id
|
36
|
+
def on_clear(_group_id)
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -11,10 +11,19 @@ module Karafka
|
|
11
11
|
module Strategies
|
12
12
|
# Base strategy that should be included in each strategy, just to ensure the API
|
13
13
|
module Base
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
# Defines all the before schedule handlers for appropriate actions
|
15
|
+
%i[
|
16
|
+
consume
|
17
|
+
idle
|
18
|
+
revoked
|
19
|
+
shutdown
|
20
|
+
].each do |action|
|
21
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
22
|
+
def handle_before_schedule_#{action}
|
23
|
+
# What should happen before scheduling this work
|
24
|
+
raise NotImplementedError, 'Implement in a subclass'
|
25
|
+
end
|
26
|
+
RUBY
|
18
27
|
end
|
19
28
|
|
20
29
|
# What should happen before we kick in the processing
|
@@ -13,6 +13,23 @@ module Karafka
|
|
13
13
|
# Apply strategy for a non-feature based flow
|
14
14
|
FEATURES = %i[].freeze
|
15
15
|
|
16
|
+
# By default on all "before schedule" we just run instrumentation, nothing more
|
17
|
+
%i[
|
18
|
+
consume
|
19
|
+
idle
|
20
|
+
revoked
|
21
|
+
shutdown
|
22
|
+
].each do |action|
|
23
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
24
|
+
# No actions needed for the standard flow here
|
25
|
+
def handle_before_schedule_#{action}
|
26
|
+
Karafka.monitor.instrument('consumer.before_schedule_#{action}', caller: self)
|
27
|
+
|
28
|
+
nil
|
29
|
+
end
|
30
|
+
RUBY
|
31
|
+
end
|
32
|
+
|
16
33
|
# Marks message as consumed in an async way.
|
17
34
|
#
|
18
35
|
# @param message [Messages::Message] last successfully processed message.
|
@@ -24,6 +41,9 @@ module Karafka
|
|
24
41
|
# already processed but rather at the next one. This applies to both sync and async
|
25
42
|
# versions of this method.
|
26
43
|
def mark_as_consumed(message)
|
44
|
+
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
45
|
+
# In case like this we ignore marking
|
46
|
+
return true if coordinator.seek_offset.nil?
|
27
47
|
# Ignore earlier offsets than the one we already committed
|
28
48
|
return true if coordinator.seek_offset > message.offset
|
29
49
|
return false if revoked?
|
@@ -40,6 +60,9 @@ module Karafka
|
|
40
60
|
# @return [Boolean] true if we were able to mark the offset, false otherwise.
|
41
61
|
# False indicates that we were not able and that we have lost the partition.
|
42
62
|
def mark_as_consumed!(message)
|
63
|
+
# seek offset can be nil only in case `#seek` was invoked with offset reset request
|
64
|
+
# In case like this we ignore marking
|
65
|
+
return true if coordinator.seek_offset.nil?
|
43
66
|
# Ignore earlier offsets than the one we already committed
|
44
67
|
return true if coordinator.seek_offset > message.offset
|
45
68
|
return false if revoked?
|
@@ -76,13 +99,6 @@ module Karafka
|
|
76
99
|
commit_offsets(async: false)
|
77
100
|
end
|
78
101
|
|
79
|
-
# No actions needed for the standard flow here
|
80
|
-
def handle_before_enqueue
|
81
|
-
Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
|
82
|
-
|
83
|
-
nil
|
84
|
-
end
|
85
|
-
|
86
102
|
# Increment number of attempts
|
87
103
|
def handle_before_consume
|
88
104
|
coordinator.pause_tracker.increment
|
@@ -14,6 +14,42 @@ module Karafka
|
|
14
14
|
dead_letter_queue
|
15
15
|
].freeze
|
16
16
|
|
17
|
+
# Override of the standard `#mark_as_consumed` in order to handle the pause tracker
|
18
|
+
# reset in case DLQ is marked as fully independent. When DLQ is marked independent,
|
19
|
+
# any offset marking causes the pause count tracker to reset. This is useful when
|
20
|
+
# the error is not due to the collective batch operations state but due to intermediate
|
21
|
+
# "crawling" errors that move with it
|
22
|
+
#
|
23
|
+
# @see `Strategies::Default#mark_as_consumed` for more details
|
24
|
+
# @param message [Messages::Message]
|
25
|
+
def mark_as_consumed(message)
|
26
|
+
# If we are not retrying pause count is already 0, no need to try to reset the state
|
27
|
+
return super unless retrying?
|
28
|
+
# If we do not use independent marking on DLQ, we just mark as consumed
|
29
|
+
return super unless topic.dead_letter_queue.independent?
|
30
|
+
# If we were not able to mark no need to reset
|
31
|
+
return false unless super
|
32
|
+
|
33
|
+
coordinator.pause_tracker.reset
|
34
|
+
|
35
|
+
true
|
36
|
+
end
|
37
|
+
|
38
|
+
# Override of the standard `#mark_as_consumed!`. Resets the pause tracker count in case
|
39
|
+
# DLQ was configured with the `independent` flag.
|
40
|
+
#
|
41
|
+
# @see `Strategies::Default#mark_as_consumed!` for more details
|
42
|
+
# @param message [Messages::Message]
|
43
|
+
def mark_as_consumed!(message)
|
44
|
+
return super unless retrying?
|
45
|
+
return super unless topic.dead_letter_queue.independent?
|
46
|
+
return false unless super
|
47
|
+
|
48
|
+
coordinator.pause_tracker.reset
|
49
|
+
|
50
|
+
true
|
51
|
+
end
|
52
|
+
|
17
53
|
# When manual offset management is on, we do not mark anything as consumed automatically
|
18
54
|
# and we rely on the user to figure things out
|
19
55
|
def handle_after_consume
|
@@ -83,7 +83,10 @@ module Karafka
|
|
83
83
|
)
|
84
84
|
ensure
|
85
85
|
# job can be nil when the queue is being closed
|
86
|
-
|
86
|
+
if job
|
87
|
+
@jobs_queue.complete(job)
|
88
|
+
job.finish!
|
89
|
+
end
|
87
90
|
|
88
91
|
# Always publish info, that we completed all the work despite its result
|
89
92
|
Karafka.monitor.instrument('worker.completed', instrument_details)
|
@@ -109,10 +109,20 @@ module Karafka
|
|
109
109
|
# subscription group customization
|
110
110
|
# @param subscription_group_name [String, Symbol] subscription group id. When not provided,
|
111
111
|
# a random uuid will be used
|
112
|
+
# @param args [Array] any extra arguments accepted by the subscription group builder
|
112
113
|
# @param block [Proc] further topics definitions
|
113
|
-
def subscription_group(
|
114
|
+
def subscription_group(
|
115
|
+
subscription_group_name = SubscriptionGroup.id,
|
116
|
+
**args,
|
117
|
+
&block
|
118
|
+
)
|
114
119
|
consumer_group('app') do
|
115
|
-
target.public_send(
|
120
|
+
target.public_send(
|
121
|
+
:subscription_group=,
|
122
|
+
subscription_group_name.to_s,
|
123
|
+
**args,
|
124
|
+
&block
|
125
|
+
)
|
116
126
|
end
|
117
127
|
end
|
118
128
|
|
@@ -14,7 +14,7 @@ module Karafka
|
|
14
14
|
# It allows us to store the "current" subscription group defined in the routing
|
15
15
|
# This subscription group id is then injected into topics, so we can compute the subscription
|
16
16
|
# groups
|
17
|
-
attr_accessor :
|
17
|
+
attr_accessor :current_subscription_group_details
|
18
18
|
|
19
19
|
# @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
|
20
20
|
# yet have an application client_id namespace, this will be added here by default.
|
@@ -26,7 +26,7 @@ module Karafka
|
|
26
26
|
@topics = Topics.new([])
|
27
27
|
# Initialize the subscription group so there's always a value for it, since even if not
|
28
28
|
# defined directly, a subscription group will be created
|
29
|
-
@
|
29
|
+
@current_subscription_group_details = { name: SubscriptionGroup.id }
|
30
30
|
end
|
31
31
|
|
32
32
|
# @return [Boolean] true if this consumer group should be active in our current process
|
@@ -48,7 +48,7 @@ module Karafka
|
|
48
48
|
built_topic = @topics.last
|
49
49
|
# We overwrite it conditionally in case it was not set by the user inline in the topic
|
50
50
|
# block definition
|
51
|
-
built_topic.
|
51
|
+
built_topic.subscription_group_details ||= current_subscription_group_details
|
52
52
|
built_topic
|
53
53
|
end
|
54
54
|
|
@@ -59,13 +59,13 @@ module Karafka
|
|
59
59
|
def subscription_group=(name = SubscriptionGroup.id, &block)
|
60
60
|
# We cast it here, so the routing supports symbol based but that's anyhow later on
|
61
61
|
# validated as a string
|
62
|
-
@
|
62
|
+
@current_subscription_group_details = { name: name.to_s }
|
63
63
|
|
64
64
|
Proxy.new(self, &block)
|
65
65
|
|
66
66
|
# We need to reset the current subscription group after it is used, so it won't leak
|
67
67
|
# outside to other topics that would be defined without a defined subscription group
|
68
|
-
@
|
68
|
+
@current_subscription_group_details = { name: SubscriptionGroup.id }
|
69
69
|
end
|
70
70
|
|
71
71
|
# @return [Array<Routing::SubscriptionGroup>] all the subscription groups build based on
|
@@ -13,17 +13,46 @@ module Karafka
|
|
13
13
|
class << self
|
14
14
|
# Extends topic and builder with given feature API
|
15
15
|
def activate
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
if const_defined?('Topic', false)
|
17
|
+
Topic.prepend(self::Topic)
|
18
|
+
end
|
19
|
+
|
20
|
+
if const_defined?('Topics', false)
|
21
|
+
Topics.prepend(self::Topics)
|
22
|
+
end
|
23
|
+
|
24
|
+
if const_defined?('ConsumerGroup', false)
|
25
|
+
ConsumerGroup.prepend(self::ConsumerGroup)
|
26
|
+
end
|
27
|
+
|
28
|
+
if const_defined?('Proxy', false)
|
29
|
+
Proxy.prepend(self::Proxy)
|
30
|
+
end
|
31
|
+
|
32
|
+
if const_defined?('Builder', false)
|
33
|
+
Builder.prepend(self::Builder)
|
34
|
+
end
|
35
|
+
|
36
|
+
if const_defined?('Contracts', false)
|
37
|
+
Builder.prepend(Base::Expander.new(self))
|
38
|
+
end
|
39
|
+
|
40
|
+
if const_defined?('SubscriptionGroup', false)
|
41
|
+
SubscriptionGroup.prepend(self::SubscriptionGroup)
|
42
|
+
end
|
43
|
+
|
44
|
+
if const_defined?('SubscriptionGroupsBuilder', false)
|
45
|
+
SubscriptionGroupsBuilder.prepend(self::SubscriptionGroupsBuilder)
|
46
|
+
end
|
22
47
|
end
|
23
48
|
|
24
|
-
# Loads all the features and activates them
|
49
|
+
# Loads all the features and activates them once
|
25
50
|
def load_all
|
51
|
+
return if @loaded
|
52
|
+
|
26
53
|
features.each(&:activate)
|
54
|
+
|
55
|
+
@loaded = true
|
27
56
|
end
|
28
57
|
|
29
58
|
# @param config [Karafka::Core::Configurable::Node] app config that we can alter with
|
@@ -41,11 +70,18 @@ module Karafka
|
|
41
70
|
|
42
71
|
private
|
43
72
|
|
44
|
-
# @return [Array<Class>] all available routing features
|
73
|
+
# @return [Array<Class>] all available routing features that are direct descendants of
|
74
|
+
# the features base.Approach with using `#superclass` prevents us from accidentally
|
75
|
+
# loading Pro components
|
45
76
|
def features
|
46
77
|
ObjectSpace
|
47
78
|
.each_object(Class)
|
48
79
|
.select { |klass| klass < self }
|
80
|
+
# Ensures, that Pro components are only loaded when we operate in Pro mode. Since
|
81
|
+
# outside of specs Zeitwerk does not require them at all, they will not be loaded
|
82
|
+
# anyhow, but for specs this needs to be done as RSpec requires all files to be
|
83
|
+
# present
|
84
|
+
.reject { |klass| Karafka.pro? ? false : klass.superclass != self }
|
49
85
|
.sort_by(&:to_s)
|
50
86
|
end
|
51
87
|
|
@@ -11,8 +11,13 @@ module Karafka
|
|
11
11
|
:max_retries,
|
12
12
|
# To what topic the skipped messages should be moved
|
13
13
|
:topic,
|
14
|
+
# Should retries be handled collectively on a batch or independently per message
|
15
|
+
:independent,
|
14
16
|
keyword_init: true
|
15
|
-
)
|
17
|
+
) do
|
18
|
+
alias_method :active?, :active
|
19
|
+
alias_method :independent?, :independent
|
20
|
+
end
|
16
21
|
end
|
17
22
|
end
|
18
23
|
end
|