karafka 2.2.12 → 2.2.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +141 -121
- data/Gemfile.lock +10 -10
- data/config/locales/errors.yml +2 -1
- data/docker-compose.yml +2 -0
- data/lib/karafka/admin.rb +109 -3
- data/lib/karafka/app.rb +7 -0
- data/lib/karafka/base_consumer.rb +23 -30
- data/lib/karafka/connection/client.rb +13 -10
- data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
- data/lib/karafka/connection/listener.rb +18 -10
- data/lib/karafka/connection/listeners_batch.rb +6 -1
- data/lib/karafka/contracts/config.rb +2 -1
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +0 -9
- data/lib/karafka/instrumentation/notifications.rb +6 -3
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/loader.rb +3 -2
- data/lib/karafka/pro/processing/coordinator.rb +12 -6
- data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +3 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +10 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +9 -5
- data/lib/karafka/processing/coordinator.rb +13 -7
- data/lib/karafka/processing/executor.rb +27 -3
- data/lib/karafka/processing/executors_buffer.rb +3 -3
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +45 -17
- data/lib/karafka/processing/schedulers/default.rb +41 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +17 -5
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/builder.rb +32 -17
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/subscription_group.rb +11 -6
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +5 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- data.tar.gz.sig +0 -0
- metadata +8 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -75
- data/lib/karafka/processing/scheduler.rb +0 -22
data/lib/karafka/admin.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Karafka
|
4
|
-
#
|
4
|
+
# Admin actions that we can perform via Karafka on our Kafka cluster
|
5
5
|
#
|
6
6
|
# @note It always initializes a new admin instance as we want to ensure it is always closed
|
7
7
|
# Since admin actions are not performed that often, that should be ok.
|
@@ -137,6 +137,109 @@ module Karafka
|
|
137
137
|
end
|
138
138
|
end
|
139
139
|
|
140
|
+
# Moves the offset on a given consumer group and provided topic to the requested location
|
141
|
+
#
|
142
|
+
# @param consumer_group_id [String] id of the consumer group for which we want to move the
|
143
|
+
# existing offset
|
144
|
+
# @param topics_with_partitions_and_offsets [Hash] Hash with list of topics and settings to
|
145
|
+
# where to move given consumer. It allows us to move particular partitions or whole topics
|
146
|
+
# if we want to reset all partitions to for example a point in time.
|
147
|
+
#
|
148
|
+
# @note This method should **not** be executed on a running consumer group as it creates a
|
149
|
+
# "fake" consumer and uses it to move offsets.
|
150
|
+
#
|
151
|
+
# @example Move a single topic partition nr 1 offset to 100
|
152
|
+
# Karafka::Admin.seek_consumer_group('group-id', { 'topic' => { 1 => 100 } })
|
153
|
+
#
|
154
|
+
# @example Move offsets on all partitions of a topic to 100
|
155
|
+
# Karafka::Admin.seek_consumer_group('group-id', { 'topic' => 100 })
|
156
|
+
#
|
157
|
+
# @example Move offset to 5 seconds ago on partition 2
|
158
|
+
# Karafka::Admin.seek_consumer_group('group-id', { 'topic' => { 2 => 5.seconds.ago } })
|
159
|
+
def seek_consumer_group(consumer_group_id, topics_with_partitions_and_offsets)
|
160
|
+
tpl_base = {}
|
161
|
+
|
162
|
+
# Normalize the data so we always have all partitions and topics in the same format
|
163
|
+
# That is in a format where we have topics and all partitions with their per partition
|
164
|
+
# assigned offsets
|
165
|
+
topics_with_partitions_and_offsets.each do |topic, partitions_with_offsets|
|
166
|
+
tpl_base[topic] = {}
|
167
|
+
|
168
|
+
if partitions_with_offsets.is_a?(Hash)
|
169
|
+
tpl_base[topic] = partitions_with_offsets
|
170
|
+
else
|
171
|
+
topic(topic)[:partition_count].times do |partition|
|
172
|
+
tpl_base[topic][partition] = partitions_with_offsets
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
178
|
+
# In case of time based location, we need to to a pre-resolution, that's why we keep it
|
179
|
+
# separately
|
180
|
+
time_tpl = Rdkafka::Consumer::TopicPartitionList.new
|
181
|
+
|
182
|
+
# Distribute properly the offset type
|
183
|
+
tpl_base.each do |topic, partitions_with_offsets|
|
184
|
+
partitions_with_offsets.each do |partition, offset|
|
185
|
+
target = offset.is_a?(Time) ? time_tpl : tpl
|
186
|
+
target.add_topic_and_partitions_with_offsets(topic, [[partition, offset]])
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# We set this that way so we can impersonate this consumer group and seek where we want
|
191
|
+
mapped_consumer_group_id = app_config.consumer_mapper.call(consumer_group_id)
|
192
|
+
settings = { 'group.id': mapped_consumer_group_id }
|
193
|
+
|
194
|
+
with_consumer(settings) do |consumer|
|
195
|
+
# If we have any time based stuff to resolve, we need to do it prior to commits
|
196
|
+
unless time_tpl.empty?
|
197
|
+
real_offsets = consumer.offsets_for_times(time_tpl)
|
198
|
+
|
199
|
+
real_offsets.to_h.each do |name, results|
|
200
|
+
results.each do |result|
|
201
|
+
raise(Errors::InvalidTimeBasedOffsetError) unless result
|
202
|
+
|
203
|
+
partition = result.partition
|
204
|
+
|
205
|
+
# Negative offset means we're beyond last message and we need to query for the
|
206
|
+
# high watermark offset to get the most recent offset and move there
|
207
|
+
if result.offset.negative?
|
208
|
+
_, offset = consumer.query_watermark_offsets(name, result.partition)
|
209
|
+
else
|
210
|
+
# If we get an offset, it means there existed a message close to this time
|
211
|
+
# location
|
212
|
+
offset = result.offset
|
213
|
+
end
|
214
|
+
|
215
|
+
# Since now we have proper offsets, we can add this to the final tpl for commit
|
216
|
+
tpl.add_topic_and_partitions_with_offsets(name, [[partition, offset]])
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
consumer.commit(tpl, false)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Removes given consumer group (if exists)
|
226
|
+
#
|
227
|
+
# @param consumer_group_id [String] consumer group name without the mapper name (if any used)
|
228
|
+
#
|
229
|
+
# @note Please note, Karafka will apply the consumer group mapper on the provided consumer
|
230
|
+
# group.
|
231
|
+
#
|
232
|
+
# @note This method should not be used on a running consumer group as it will not yield any
|
233
|
+
# results.
|
234
|
+
def delete_consumer_group(consumer_group_id)
|
235
|
+
mapped_consumer_group_id = app_config.consumer_mapper.call(consumer_group_id)
|
236
|
+
|
237
|
+
with_admin do |admin|
|
238
|
+
handler = admin.delete_group(mapped_consumer_group_id)
|
239
|
+
handler.wait(max_wait_timeout: app_config.admin.max_wait_time)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
140
243
|
# Fetches the watermark offsets for a given topic partition
|
141
244
|
#
|
142
245
|
# @param name [String, Symbol] topic name
|
@@ -264,7 +367,7 @@ module Karafka
|
|
264
367
|
# @param settings [Hash] extra settings for config (if needed)
|
265
368
|
# @return [::Rdkafka::Config] rdkafka config
|
266
369
|
def config(type, settings)
|
267
|
-
|
370
|
+
mapped_admin_group_id = app_config.consumer_mapper.call(
|
268
371
|
app_config.admin.group_id
|
269
372
|
)
|
270
373
|
|
@@ -272,8 +375,11 @@ module Karafka
|
|
272
375
|
.kafka
|
273
376
|
.then(&:dup)
|
274
377
|
.merge(app_config.admin.kafka)
|
378
|
+
.tap { |config| config[:'group.id'] = mapped_admin_group_id }
|
379
|
+
# We merge after setting the group id so it can be altered if needed
|
380
|
+
# In general in admin we only should alter it when we need to impersonate a given
|
381
|
+
# consumer group or do something similar
|
275
382
|
.merge!(settings)
|
276
|
-
.tap { |config| config[:'group.id'] = group_id }
|
277
383
|
.then { |config| Karafka::Setup::AttributesMap.public_send(type, config) }
|
278
384
|
.then { |config| ::Rdkafka::Config.new(config) }
|
279
385
|
end
|
data/lib/karafka/app.rb
CHANGED
@@ -36,6 +36,13 @@ module Karafka
|
|
36
36
|
# Just a nicer name for the consumer groups
|
37
37
|
alias routes consumer_groups
|
38
38
|
|
39
|
+
# Returns current assignments of this process. Both topics and partitions
|
40
|
+
#
|
41
|
+
# @return [Hash<Karafka::Routing::Topic, Array<Integer>>]
|
42
|
+
def assignments
|
43
|
+
Instrumentation::AssignmentsTracker.instance.current
|
44
|
+
end
|
45
|
+
|
39
46
|
# Allow for easier status management via `Karafka::App` by aliasing status methods here
|
40
47
|
Status::STATES.each do |state, transition|
|
41
48
|
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
@@ -34,16 +34,9 @@ module Karafka
|
|
34
34
|
# @note This should not be used by the end users as it is part of the lifecycle of things and
|
35
35
|
# not as a part of the public api. This should not perform any extensive operations as it is
|
36
36
|
# blocking and running in the listener thread.
|
37
|
-
def
|
37
|
+
def on_before_schedule_consume
|
38
38
|
@used = true
|
39
|
-
|
40
|
-
rescue StandardError => e
|
41
|
-
Karafka.monitor.instrument(
|
42
|
-
'error.occurred',
|
43
|
-
error: e,
|
44
|
-
caller: self,
|
45
|
-
type: 'consumer.before_enqueue.error'
|
46
|
-
)
|
39
|
+
handle_before_schedule_consume
|
47
40
|
end
|
48
41
|
|
49
42
|
# Can be used to run preparation code in the worker
|
@@ -59,13 +52,6 @@ module Karafka
|
|
59
52
|
# We run this after the full metadata setup, so we can use all the messages information
|
60
53
|
# if needed
|
61
54
|
handle_before_consume
|
62
|
-
rescue StandardError => e
|
63
|
-
Karafka.monitor.instrument(
|
64
|
-
'error.occurred',
|
65
|
-
error: e,
|
66
|
-
caller: self,
|
67
|
-
type: 'consumer.before_consume.error'
|
68
|
-
)
|
69
55
|
end
|
70
56
|
|
71
57
|
# Executes the default consumer flow.
|
@@ -94,13 +80,13 @@ module Karafka
|
|
94
80
|
# not as part of the public api.
|
95
81
|
def on_after_consume
|
96
82
|
handle_after_consume
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
83
|
+
end
|
84
|
+
|
85
|
+
# Can be used to run code prior to scheduling of idle execution
|
86
|
+
#
|
87
|
+
# @private
|
88
|
+
def on_before_schedule_idle
|
89
|
+
handle_before_schedule_idle
|
104
90
|
end
|
105
91
|
|
106
92
|
# Trigger method for running on idle runs without messages
|
@@ -108,13 +94,13 @@ module Karafka
|
|
108
94
|
# @private
|
109
95
|
def on_idle
|
110
96
|
handle_idle
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
97
|
+
end
|
98
|
+
|
99
|
+
# Can be used to run code prior to scheduling of revoked execution
|
100
|
+
#
|
101
|
+
# @private
|
102
|
+
def on_before_schedule_revoked
|
103
|
+
handle_before_schedule_revoked
|
118
104
|
end
|
119
105
|
|
120
106
|
# Trigger method for running on partition revocation.
|
@@ -131,6 +117,13 @@ module Karafka
|
|
131
117
|
)
|
132
118
|
end
|
133
119
|
|
120
|
+
# Can be used to run code prior to scheduling of revoked execution
|
121
|
+
#
|
122
|
+
# @private
|
123
|
+
def on_before_schedule_shutdown
|
124
|
+
handle_before_schedule_shutdown
|
125
|
+
end
|
126
|
+
|
134
127
|
# Trigger method for running on shutdown.
|
135
128
|
#
|
136
129
|
# @private
|
@@ -45,10 +45,7 @@ module Karafka
|
|
45
45
|
@buffer = RawMessagesBuffer.new
|
46
46
|
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
47
47
|
@rebalance_manager = RebalanceManager.new(@subscription_group.id)
|
48
|
-
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(
|
49
|
-
@subscription_group.id,
|
50
|
-
@subscription_group.consumer_group.id
|
51
|
-
)
|
48
|
+
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
|
52
49
|
@events_poller = Helpers::IntervalRunner.new { events_poll }
|
53
50
|
@kafka = build_consumer
|
54
51
|
# There are few operations that can happen in parallel from the listener threads as well
|
@@ -309,12 +306,18 @@ module Karafka
|
|
309
306
|
|
310
307
|
# Closes and resets the client completely.
|
311
308
|
def reset
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
309
|
+
Karafka.monitor.instrument(
|
310
|
+
'client.reset',
|
311
|
+
caller: self,
|
312
|
+
subscription_group: @subscription_group
|
313
|
+
) do
|
314
|
+
close
|
315
|
+
|
316
|
+
@events_poller.reset
|
317
|
+
@closed = false
|
318
|
+
@paused_tpls.clear
|
319
|
+
@kafka = build_consumer
|
320
|
+
end
|
318
321
|
end
|
319
322
|
|
320
323
|
# Runs a single poll on the main queue and consumer queue ignoring all the potential errors
|
@@ -16,7 +16,7 @@ module Karafka
|
|
16
16
|
class ConsumerGroupCoordinator
|
17
17
|
# @param group_size [Integer] number of separate subscription groups in a consumer group
|
18
18
|
def initialize(group_size)
|
19
|
-
@
|
19
|
+
@shutdown_mutex = Mutex.new
|
20
20
|
@group_size = group_size
|
21
21
|
@finished = Set.new
|
22
22
|
end
|
@@ -30,12 +30,12 @@ module Karafka
|
|
30
30
|
# @return [Boolean] can we start shutdown on a given listener
|
31
31
|
# @note If true, will also obtain a lock so no-one else will be closing the same time we do
|
32
32
|
def shutdown?
|
33
|
-
finished? && @
|
33
|
+
finished? && @shutdown_mutex.try_lock
|
34
34
|
end
|
35
35
|
|
36
36
|
# Unlocks the shutdown lock
|
37
37
|
def unlock
|
38
|
-
@
|
38
|
+
@shutdown_mutex.unlock if @shutdown_mutex.owned?
|
39
39
|
end
|
40
40
|
|
41
41
|
# Marks given listener as finished
|
@@ -23,8 +23,9 @@ module Karafka
|
|
23
23
|
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
24
24
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
25
25
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
26
|
+
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
26
27
|
# @return [Karafka::Connection::Listener] listener instance
|
27
|
-
def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
|
28
|
+
def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
|
28
29
|
proc_config = ::Karafka::App.config.internal.processing
|
29
30
|
|
30
31
|
@id = SecureRandom.hex(6)
|
@@ -36,8 +37,7 @@ module Karafka
|
|
36
37
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
37
38
|
@jobs_builder = proc_config.jobs_builder
|
38
39
|
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
39
|
-
|
40
|
-
@scheduler = proc_config.scheduler
|
40
|
+
@scheduler = scheduler
|
41
41
|
@events_poller = Helpers::IntervalRunner.new { @client.events_poll }
|
42
42
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
43
43
|
# We can do this that way because we always first schedule jobs using messages before we
|
@@ -45,6 +45,8 @@ module Karafka
|
|
45
45
|
@messages_buffer = MessagesBuffer.new(subscription_group)
|
46
46
|
@mutex = Mutex.new
|
47
47
|
@stopped = false
|
48
|
+
|
49
|
+
@jobs_queue.register(@subscription_group.id)
|
48
50
|
end
|
49
51
|
|
50
52
|
# Runs the main listener fetch loop.
|
@@ -230,7 +232,7 @@ module Karafka
|
|
230
232
|
# here. In cases like this, we do not run a revocation job
|
231
233
|
@executors.find_all(topic, partition).each do |executor|
|
232
234
|
job = @jobs_builder.revoked(executor)
|
233
|
-
job.
|
235
|
+
job.before_schedule
|
234
236
|
jobs << job
|
235
237
|
end
|
236
238
|
|
@@ -243,7 +245,7 @@ module Karafka
|
|
243
245
|
end
|
244
246
|
end
|
245
247
|
|
246
|
-
@scheduler.
|
248
|
+
@scheduler.on_schedule_revocation(jobs)
|
247
249
|
end
|
248
250
|
|
249
251
|
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
@@ -252,11 +254,11 @@ module Karafka
|
|
252
254
|
|
253
255
|
@executors.each do |executor|
|
254
256
|
job = @jobs_builder.shutdown(executor)
|
255
|
-
job.
|
257
|
+
job.before_schedule
|
256
258
|
jobs << job
|
257
259
|
end
|
258
260
|
|
259
|
-
@scheduler.
|
261
|
+
@scheduler.on_schedule_shutdown(jobs)
|
260
262
|
end
|
261
263
|
|
262
264
|
# Polls messages within the time and amount boundaries defined in the settings and then
|
@@ -296,14 +298,17 @@ module Karafka
|
|
296
298
|
end
|
297
299
|
end
|
298
300
|
|
299
|
-
jobs.each(&:
|
301
|
+
jobs.each(&:before_schedule)
|
300
302
|
|
301
|
-
@scheduler.
|
303
|
+
@scheduler.on_schedule_consumption(jobs)
|
302
304
|
end
|
303
305
|
|
304
306
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
305
307
|
def wait
|
306
|
-
@jobs_queue.wait(@subscription_group.id)
|
308
|
+
@jobs_queue.wait(@subscription_group.id) do
|
309
|
+
@events_poller.call
|
310
|
+
@scheduler.on_manage
|
311
|
+
end
|
307
312
|
end
|
308
313
|
|
309
314
|
# Waits without blocking the polling
|
@@ -319,6 +324,8 @@ module Karafka
|
|
319
324
|
def wait_pinging(wait_until:, after_ping: -> {})
|
320
325
|
until wait_until.call
|
321
326
|
@client.ping
|
327
|
+
@scheduler.on_manage
|
328
|
+
|
322
329
|
after_ping.call
|
323
330
|
sleep(0.2)
|
324
331
|
end
|
@@ -334,6 +341,7 @@ module Karafka
|
|
334
341
|
# resetting.
|
335
342
|
@jobs_queue.wait(@subscription_group.id)
|
336
343
|
@jobs_queue.clear(@subscription_group.id)
|
344
|
+
@scheduler.on_clear(@subscription_group.id)
|
337
345
|
@events_poller.reset
|
338
346
|
@client.reset
|
339
347
|
@coordinators.reset
|
@@ -11,6 +11,10 @@ module Karafka
|
|
11
11
|
# @param jobs_queue [JobsQueue]
|
12
12
|
# @return [ListenersBatch]
|
13
13
|
def initialize(jobs_queue)
|
14
|
+
# We need one scheduler for all the listeners because in case of complex schedulers, they
|
15
|
+
# should be able to distribute work whenever any work is done in any of the listeners
|
16
|
+
scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
|
17
|
+
|
14
18
|
@coordinators = []
|
15
19
|
|
16
20
|
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
@@ -24,7 +28,8 @@ module Karafka
|
|
24
28
|
Connection::Listener.new(
|
25
29
|
consumer_group_coordinator,
|
26
30
|
subscription_group,
|
27
|
-
jobs_queue
|
31
|
+
jobs_queue,
|
32
|
+
scheduler
|
28
33
|
)
|
29
34
|
end
|
30
35
|
end
|
@@ -73,7 +73,8 @@ module Karafka
|
|
73
73
|
|
74
74
|
nested(:processing) do
|
75
75
|
required(:jobs_builder) { |val| !val.nil? }
|
76
|
-
required(:
|
76
|
+
required(:jobs_queue_class) { |val| !val.nil? }
|
77
|
+
required(:scheduler_class) { |val| !val.nil? }
|
77
78
|
required(:coordinator_class) { |val| !val.nil? }
|
78
79
|
required(:partitioner_class) { |val| !val.nil? }
|
79
80
|
required(:strategy_selector) { |val| !val.nil? }
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
# Keeps track of active assignments and materializes them by returning the routing topics
|
6
|
+
# with appropriate partitions that are assigned at a given moment
|
7
|
+
#
|
8
|
+
# It is auto-subscribed as part of Karafka itself.
|
9
|
+
#
|
10
|
+
# It is not heavy from the computational point of view, as it only operates during rebalances.
|
11
|
+
#
|
12
|
+
# We keep assignments as flat topics structure because we can go from topics to both
|
13
|
+
# subscription and consumer groups if needed.
|
14
|
+
class AssignmentsTracker
|
15
|
+
include Singleton
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@mutex = Mutex.new
|
19
|
+
@assignments = Hash.new { |hash, key| hash[key] = [] }
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns all the active/current assignments of this given process
|
23
|
+
#
|
24
|
+
# @return [Hash<Karafka::Routing::Topic, Array<Integer>>]
|
25
|
+
#
|
26
|
+
# @note Keep in mind, that those assignments can change any time, especially when working
|
27
|
+
# with multiple consumer groups or subscription groups.
|
28
|
+
#
|
29
|
+
# @note We return a copy because we modify internals and we do not want user to tamper with
|
30
|
+
# the data accidentally
|
31
|
+
def current
|
32
|
+
assignments = {}
|
33
|
+
|
34
|
+
@assignments.each do |topic, partitions|
|
35
|
+
assignments[topic] = partitions.dup.freeze
|
36
|
+
end
|
37
|
+
|
38
|
+
assignments.freeze
|
39
|
+
end
|
40
|
+
|
41
|
+
# Clears all the assignments
|
42
|
+
def clear
|
43
|
+
@mutex.synchronize do
|
44
|
+
@assignments.clear
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# When client is under reset due to critical issues, remove all of its assignments as we will
|
49
|
+
# get a new set of assignments
|
50
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
51
|
+
def on_client_reset(event)
|
52
|
+
sg = event[:subscription_group]
|
53
|
+
|
54
|
+
@mutex.synchronize do
|
55
|
+
@assignments.delete_if do |topic, _partitions|
|
56
|
+
topic.subscription_group.id == sg.id
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Removes partitions from the current assignments hash
|
62
|
+
#
|
63
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
64
|
+
def on_rebalance_partitions_revoked(event)
|
65
|
+
sg = event[:subscription_group]
|
66
|
+
|
67
|
+
@mutex.synchronize do
|
68
|
+
event[:tpl].to_h.each do |topic, partitions|
|
69
|
+
topic = sg.topics.find(topic)
|
70
|
+
|
71
|
+
@assignments[topic] -= partitions.map(&:partition)
|
72
|
+
@assignments[topic].sort!
|
73
|
+
# Remove completely topics for which we do not have any assignments left
|
74
|
+
@assignments.delete_if { |_topic, cur_partitions| cur_partitions.empty? }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# # Adds partitions to the current assignments hash
|
80
|
+
#
|
81
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
82
|
+
def on_rebalance_partitions_assigned(event)
|
83
|
+
sg = event[:subscription_group]
|
84
|
+
|
85
|
+
@mutex.synchronize do
|
86
|
+
event[:tpl].to_h.each do |topic, partitions|
|
87
|
+
topic = sg.topics.find(topic)
|
88
|
+
|
89
|
+
@assignments[topic] += partitions.map(&:partition)
|
90
|
+
@assignments[topic].sort!
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -6,11 +6,10 @@ module Karafka
|
|
6
6
|
# Callback that connects to the librdkafka rebalance callback and converts those events into
|
7
7
|
# our internal events
|
8
8
|
class Rebalance
|
9
|
-
# @param
|
10
|
-
#
|
11
|
-
def initialize(
|
12
|
-
@
|
13
|
-
@consumer_group_id = consumer_group_id
|
9
|
+
# @param subscription_group [Karafka::Routes::SubscriptionGroup] subscription group for
|
10
|
+
# which we want to manage rebalances
|
11
|
+
def initialize(subscription_group)
|
12
|
+
@subscription_group = subscription_group
|
14
13
|
end
|
15
14
|
|
16
15
|
# Publishes an event that partitions are going to be revoked.
|
@@ -53,8 +52,12 @@ module Karafka
|
|
53
52
|
::Karafka.monitor.instrument(
|
54
53
|
"rebalance.#{name}",
|
55
54
|
caller: self,
|
56
|
-
|
57
|
-
|
55
|
+
# We keep the id references here for backwards compatibility as some of the monitors
|
56
|
+
# may use the id references
|
57
|
+
subscription_group_id: @subscription_group.id,
|
58
|
+
subscription_group: @subscription_group,
|
59
|
+
consumer_group_id: @subscription_group.consumer_group.id,
|
60
|
+
consumer_group: @subscription_group.consumer_group,
|
58
61
|
tpl: tpl
|
59
62
|
)
|
60
63
|
end
|
@@ -241,15 +241,6 @@ module Karafka
|
|
241
241
|
when 'consumer.revoked.error'
|
242
242
|
error "Consumer on revoked failed due to an error: #{error}"
|
243
243
|
error details
|
244
|
-
when 'consumer.before_enqueue.error'
|
245
|
-
error "Consumer before enqueue failed due to an error: #{error}"
|
246
|
-
error details
|
247
|
-
when 'consumer.before_consume.error'
|
248
|
-
error "Consumer before consume failed due to an error: #{error}"
|
249
|
-
error details
|
250
|
-
when 'consumer.after_consume.error'
|
251
|
-
error "Consumer after consume failed due to an error: #{error}"
|
252
|
-
error details
|
253
244
|
when 'consumer.idle.error'
|
254
245
|
error "Consumer idle failed due to an error: #{error}"
|
255
246
|
error details
|
@@ -20,6 +20,7 @@ module Karafka
|
|
20
20
|
active_job.consume
|
21
21
|
active_job.consumed
|
22
22
|
|
23
|
+
app.initializing
|
23
24
|
app.initialized
|
24
25
|
app.running
|
25
26
|
app.quieting
|
@@ -30,26 +31,28 @@ module Karafka
|
|
30
31
|
|
31
32
|
client.pause
|
32
33
|
client.resume
|
34
|
+
client.reset
|
33
35
|
|
34
36
|
connection.listener.before_fetch_loop
|
35
37
|
connection.listener.fetch_loop
|
36
38
|
connection.listener.fetch_loop.received
|
37
39
|
|
38
|
-
connection.client.poll.error
|
39
|
-
connection.client.unsubscribe.error
|
40
|
-
|
41
40
|
rebalance.partitions_assign
|
42
41
|
rebalance.partitions_assigned
|
43
42
|
rebalance.partitions_revoke
|
44
43
|
rebalance.partitions_revoked
|
45
44
|
|
45
|
+
consumer.before_schedule_consume
|
46
46
|
consumer.consume
|
47
47
|
consumer.consumed
|
48
48
|
consumer.consuming.pause
|
49
49
|
consumer.consuming.retry
|
50
|
+
consumer.before_schedule_idle
|
50
51
|
consumer.idle
|
52
|
+
consumer.before_schedule_revoked
|
51
53
|
consumer.revoke
|
52
54
|
consumer.revoked
|
55
|
+
consumer.before_schedule_shutdown
|
53
56
|
consumer.shutting_down
|
54
57
|
consumer.shutdown
|
55
58
|
|
@@ -94,8 +94,8 @@ module Karafka
|
|
94
94
|
error "Consumer consuming error: #{error}"
|
95
95
|
when 'consumer.revoked.error'
|
96
96
|
error "Consumer on revoked failed due to an error: #{error}"
|
97
|
-
when 'consumer.
|
98
|
-
error "Consumer before
|
97
|
+
when 'consumer.before_schedule.error'
|
98
|
+
error "Consumer before schedule failed due to an error: #{error}"
|
99
99
|
when 'consumer.before_consume.error'
|
100
100
|
error "Consumer before consume failed due to an error: #{error}"
|
101
101
|
when 'consumer.after_consume.error'
|