karafka 2.2.12 → 2.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +141 -121
- data/Gemfile.lock +10 -10
- data/config/locales/errors.yml +2 -1
- data/docker-compose.yml +2 -0
- data/lib/karafka/admin.rb +109 -3
- data/lib/karafka/app.rb +7 -0
- data/lib/karafka/base_consumer.rb +23 -30
- data/lib/karafka/connection/client.rb +13 -10
- data/lib/karafka/connection/consumer_group_coordinator.rb +3 -3
- data/lib/karafka/connection/listener.rb +18 -10
- data/lib/karafka/connection/listeners_batch.rb +6 -1
- data/lib/karafka/contracts/config.rb +2 -1
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +0 -9
- data/lib/karafka/instrumentation/notifications.rb +6 -3
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +2 -2
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/loader.rb +3 -2
- data/lib/karafka/pro/processing/coordinator.rb +12 -6
- data/lib/karafka/pro/processing/jobs_queue.rb +109 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +127 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +109 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +3 -1
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +10 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +9 -5
- data/lib/karafka/processing/coordinator.rb +13 -7
- data/lib/karafka/processing/executor.rb +27 -3
- data/lib/karafka/processing/executors_buffer.rb +3 -3
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +45 -17
- data/lib/karafka/processing/schedulers/default.rb +41 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +17 -5
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/builder.rb +32 -17
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/subscription_group.rb +11 -6
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +5 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +0 -1
- data.tar.gz.sig +0 -0
- metadata +8 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -75
- data/lib/karafka/processing/scheduler.rb +0 -22
data/lib/karafka/admin.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Karafka
|
4
|
-
#
|
4
|
+
# Admin actions that we can perform via Karafka on our Kafka cluster
|
5
5
|
#
|
6
6
|
# @note It always initializes a new admin instance as we want to ensure it is always closed
|
7
7
|
# Since admin actions are not performed that often, that should be ok.
|
@@ -137,6 +137,109 @@ module Karafka
|
|
137
137
|
end
|
138
138
|
end
|
139
139
|
|
140
|
+
# Moves the offset on a given consumer group and provided topic to the requested location
|
141
|
+
#
|
142
|
+
# @param consumer_group_id [String] id of the consumer group for which we want to move the
|
143
|
+
# existing offset
|
144
|
+
# @param topics_with_partitions_and_offsets [Hash] Hash with list of topics and settings to
|
145
|
+
# where to move given consumer. It allows us to move particular partitions or whole topics
|
146
|
+
# if we want to reset all partitions to for example a point in time.
|
147
|
+
#
|
148
|
+
# @note This method should **not** be executed on a running consumer group as it creates a
|
149
|
+
# "fake" consumer and uses it to move offsets.
|
150
|
+
#
|
151
|
+
# @example Move a single topic partition nr 1 offset to 100
|
152
|
+
# Karafka::Admin.seek_consumer_group('group-id', { 'topic' => { 1 => 100 } })
|
153
|
+
#
|
154
|
+
# @example Move offsets on all partitions of a topic to 100
|
155
|
+
# Karafka::Admin.seek_consumer_group('group-id', { 'topic' => 100 })
|
156
|
+
#
|
157
|
+
# @example Move offset to 5 seconds ago on partition 2
|
158
|
+
# Karafka::Admin.seek_consumer_group('group-id', { 'topic' => { 2 => 5.seconds.ago } })
|
159
|
+
def seek_consumer_group(consumer_group_id, topics_with_partitions_and_offsets)
|
160
|
+
tpl_base = {}
|
161
|
+
|
162
|
+
# Normalize the data so we always have all partitions and topics in the same format
|
163
|
+
# That is in a format where we have topics and all partitions with their per partition
|
164
|
+
# assigned offsets
|
165
|
+
topics_with_partitions_and_offsets.each do |topic, partitions_with_offsets|
|
166
|
+
tpl_base[topic] = {}
|
167
|
+
|
168
|
+
if partitions_with_offsets.is_a?(Hash)
|
169
|
+
tpl_base[topic] = partitions_with_offsets
|
170
|
+
else
|
171
|
+
topic(topic)[:partition_count].times do |partition|
|
172
|
+
tpl_base[topic][partition] = partitions_with_offsets
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
178
|
+
# In case of time based location, we need to to a pre-resolution, that's why we keep it
|
179
|
+
# separately
|
180
|
+
time_tpl = Rdkafka::Consumer::TopicPartitionList.new
|
181
|
+
|
182
|
+
# Distribute properly the offset type
|
183
|
+
tpl_base.each do |topic, partitions_with_offsets|
|
184
|
+
partitions_with_offsets.each do |partition, offset|
|
185
|
+
target = offset.is_a?(Time) ? time_tpl : tpl
|
186
|
+
target.add_topic_and_partitions_with_offsets(topic, [[partition, offset]])
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# We set this that way so we can impersonate this consumer group and seek where we want
|
191
|
+
mapped_consumer_group_id = app_config.consumer_mapper.call(consumer_group_id)
|
192
|
+
settings = { 'group.id': mapped_consumer_group_id }
|
193
|
+
|
194
|
+
with_consumer(settings) do |consumer|
|
195
|
+
# If we have any time based stuff to resolve, we need to do it prior to commits
|
196
|
+
unless time_tpl.empty?
|
197
|
+
real_offsets = consumer.offsets_for_times(time_tpl)
|
198
|
+
|
199
|
+
real_offsets.to_h.each do |name, results|
|
200
|
+
results.each do |result|
|
201
|
+
raise(Errors::InvalidTimeBasedOffsetError) unless result
|
202
|
+
|
203
|
+
partition = result.partition
|
204
|
+
|
205
|
+
# Negative offset means we're beyond last message and we need to query for the
|
206
|
+
# high watermark offset to get the most recent offset and move there
|
207
|
+
if result.offset.negative?
|
208
|
+
_, offset = consumer.query_watermark_offsets(name, result.partition)
|
209
|
+
else
|
210
|
+
# If we get an offset, it means there existed a message close to this time
|
211
|
+
# location
|
212
|
+
offset = result.offset
|
213
|
+
end
|
214
|
+
|
215
|
+
# Since now we have proper offsets, we can add this to the final tpl for commit
|
216
|
+
tpl.add_topic_and_partitions_with_offsets(name, [[partition, offset]])
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
consumer.commit(tpl, false)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Removes given consumer group (if exists)
|
226
|
+
#
|
227
|
+
# @param consumer_group_id [String] consumer group name without the mapper name (if any used)
|
228
|
+
#
|
229
|
+
# @note Please note, Karafka will apply the consumer group mapper on the provided consumer
|
230
|
+
# group.
|
231
|
+
#
|
232
|
+
# @note This method should not be used on a running consumer group as it will not yield any
|
233
|
+
# results.
|
234
|
+
def delete_consumer_group(consumer_group_id)
|
235
|
+
mapped_consumer_group_id = app_config.consumer_mapper.call(consumer_group_id)
|
236
|
+
|
237
|
+
with_admin do |admin|
|
238
|
+
handler = admin.delete_group(mapped_consumer_group_id)
|
239
|
+
handler.wait(max_wait_timeout: app_config.admin.max_wait_time)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
140
243
|
# Fetches the watermark offsets for a given topic partition
|
141
244
|
#
|
142
245
|
# @param name [String, Symbol] topic name
|
@@ -264,7 +367,7 @@ module Karafka
|
|
264
367
|
# @param settings [Hash] extra settings for config (if needed)
|
265
368
|
# @return [::Rdkafka::Config] rdkafka config
|
266
369
|
def config(type, settings)
|
267
|
-
|
370
|
+
mapped_admin_group_id = app_config.consumer_mapper.call(
|
268
371
|
app_config.admin.group_id
|
269
372
|
)
|
270
373
|
|
@@ -272,8 +375,11 @@ module Karafka
|
|
272
375
|
.kafka
|
273
376
|
.then(&:dup)
|
274
377
|
.merge(app_config.admin.kafka)
|
378
|
+
.tap { |config| config[:'group.id'] = mapped_admin_group_id }
|
379
|
+
# We merge after setting the group id so it can be altered if needed
|
380
|
+
# In general in admin we only should alter it when we need to impersonate a given
|
381
|
+
# consumer group or do something similar
|
275
382
|
.merge!(settings)
|
276
|
-
.tap { |config| config[:'group.id'] = group_id }
|
277
383
|
.then { |config| Karafka::Setup::AttributesMap.public_send(type, config) }
|
278
384
|
.then { |config| ::Rdkafka::Config.new(config) }
|
279
385
|
end
|
data/lib/karafka/app.rb
CHANGED
@@ -36,6 +36,13 @@ module Karafka
|
|
36
36
|
# Just a nicer name for the consumer groups
|
37
37
|
alias routes consumer_groups
|
38
38
|
|
39
|
+
# Returns current assignments of this process. Both topics and partitions
|
40
|
+
#
|
41
|
+
# @return [Hash<Karafka::Routing::Topic, Array<Integer>>]
|
42
|
+
def assignments
|
43
|
+
Instrumentation::AssignmentsTracker.instance.current
|
44
|
+
end
|
45
|
+
|
39
46
|
# Allow for easier status management via `Karafka::App` by aliasing status methods here
|
40
47
|
Status::STATES.each do |state, transition|
|
41
48
|
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
@@ -34,16 +34,9 @@ module Karafka
|
|
34
34
|
# @note This should not be used by the end users as it is part of the lifecycle of things and
|
35
35
|
# not as a part of the public api. This should not perform any extensive operations as it is
|
36
36
|
# blocking and running in the listener thread.
|
37
|
-
def
|
37
|
+
def on_before_schedule_consume
|
38
38
|
@used = true
|
39
|
-
|
40
|
-
rescue StandardError => e
|
41
|
-
Karafka.monitor.instrument(
|
42
|
-
'error.occurred',
|
43
|
-
error: e,
|
44
|
-
caller: self,
|
45
|
-
type: 'consumer.before_enqueue.error'
|
46
|
-
)
|
39
|
+
handle_before_schedule_consume
|
47
40
|
end
|
48
41
|
|
49
42
|
# Can be used to run preparation code in the worker
|
@@ -59,13 +52,6 @@ module Karafka
|
|
59
52
|
# We run this after the full metadata setup, so we can use all the messages information
|
60
53
|
# if needed
|
61
54
|
handle_before_consume
|
62
|
-
rescue StandardError => e
|
63
|
-
Karafka.monitor.instrument(
|
64
|
-
'error.occurred',
|
65
|
-
error: e,
|
66
|
-
caller: self,
|
67
|
-
type: 'consumer.before_consume.error'
|
68
|
-
)
|
69
55
|
end
|
70
56
|
|
71
57
|
# Executes the default consumer flow.
|
@@ -94,13 +80,13 @@ module Karafka
|
|
94
80
|
# not as part of the public api.
|
95
81
|
def on_after_consume
|
96
82
|
handle_after_consume
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
83
|
+
end
|
84
|
+
|
85
|
+
# Can be used to run code prior to scheduling of idle execution
|
86
|
+
#
|
87
|
+
# @private
|
88
|
+
def on_before_schedule_idle
|
89
|
+
handle_before_schedule_idle
|
104
90
|
end
|
105
91
|
|
106
92
|
# Trigger method for running on idle runs without messages
|
@@ -108,13 +94,13 @@ module Karafka
|
|
108
94
|
# @private
|
109
95
|
def on_idle
|
110
96
|
handle_idle
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
97
|
+
end
|
98
|
+
|
99
|
+
# Can be used to run code prior to scheduling of revoked execution
|
100
|
+
#
|
101
|
+
# @private
|
102
|
+
def on_before_schedule_revoked
|
103
|
+
handle_before_schedule_revoked
|
118
104
|
end
|
119
105
|
|
120
106
|
# Trigger method for running on partition revocation.
|
@@ -131,6 +117,13 @@ module Karafka
|
|
131
117
|
)
|
132
118
|
end
|
133
119
|
|
120
|
+
# Can be used to run code prior to scheduling of revoked execution
|
121
|
+
#
|
122
|
+
# @private
|
123
|
+
def on_before_schedule_shutdown
|
124
|
+
handle_before_schedule_shutdown
|
125
|
+
end
|
126
|
+
|
134
127
|
# Trigger method for running on shutdown.
|
135
128
|
#
|
136
129
|
# @private
|
@@ -45,10 +45,7 @@ module Karafka
|
|
45
45
|
@buffer = RawMessagesBuffer.new
|
46
46
|
@tick_interval = ::Karafka::App.config.internal.tick_interval
|
47
47
|
@rebalance_manager = RebalanceManager.new(@subscription_group.id)
|
48
|
-
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(
|
49
|
-
@subscription_group.id,
|
50
|
-
@subscription_group.consumer_group.id
|
51
|
-
)
|
48
|
+
@rebalance_callback = Instrumentation::Callbacks::Rebalance.new(@subscription_group)
|
52
49
|
@events_poller = Helpers::IntervalRunner.new { events_poll }
|
53
50
|
@kafka = build_consumer
|
54
51
|
# There are few operations that can happen in parallel from the listener threads as well
|
@@ -309,12 +306,18 @@ module Karafka
|
|
309
306
|
|
310
307
|
# Closes and resets the client completely.
|
311
308
|
def reset
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
309
|
+
Karafka.monitor.instrument(
|
310
|
+
'client.reset',
|
311
|
+
caller: self,
|
312
|
+
subscription_group: @subscription_group
|
313
|
+
) do
|
314
|
+
close
|
315
|
+
|
316
|
+
@events_poller.reset
|
317
|
+
@closed = false
|
318
|
+
@paused_tpls.clear
|
319
|
+
@kafka = build_consumer
|
320
|
+
end
|
318
321
|
end
|
319
322
|
|
320
323
|
# Runs a single poll on the main queue and consumer queue ignoring all the potential errors
|
@@ -16,7 +16,7 @@ module Karafka
|
|
16
16
|
class ConsumerGroupCoordinator
|
17
17
|
# @param group_size [Integer] number of separate subscription groups in a consumer group
|
18
18
|
def initialize(group_size)
|
19
|
-
@
|
19
|
+
@shutdown_mutex = Mutex.new
|
20
20
|
@group_size = group_size
|
21
21
|
@finished = Set.new
|
22
22
|
end
|
@@ -30,12 +30,12 @@ module Karafka
|
|
30
30
|
# @return [Boolean] can we start shutdown on a given listener
|
31
31
|
# @note If true, will also obtain a lock so no-one else will be closing the same time we do
|
32
32
|
def shutdown?
|
33
|
-
finished? && @
|
33
|
+
finished? && @shutdown_mutex.try_lock
|
34
34
|
end
|
35
35
|
|
36
36
|
# Unlocks the shutdown lock
|
37
37
|
def unlock
|
38
|
-
@
|
38
|
+
@shutdown_mutex.unlock if @shutdown_mutex.owned?
|
39
39
|
end
|
40
40
|
|
41
41
|
# Marks given listener as finished
|
@@ -23,8 +23,9 @@ module Karafka
|
|
23
23
|
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
24
24
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
25
25
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
26
|
+
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
26
27
|
# @return [Karafka::Connection::Listener] listener instance
|
27
|
-
def initialize(consumer_group_coordinator, subscription_group, jobs_queue)
|
28
|
+
def initialize(consumer_group_coordinator, subscription_group, jobs_queue, scheduler)
|
28
29
|
proc_config = ::Karafka::App.config.internal.processing
|
29
30
|
|
30
31
|
@id = SecureRandom.hex(6)
|
@@ -36,8 +37,7 @@ module Karafka
|
|
36
37
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
37
38
|
@jobs_builder = proc_config.jobs_builder
|
38
39
|
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
39
|
-
|
40
|
-
@scheduler = proc_config.scheduler
|
40
|
+
@scheduler = scheduler
|
41
41
|
@events_poller = Helpers::IntervalRunner.new { @client.events_poll }
|
42
42
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
43
43
|
# We can do this that way because we always first schedule jobs using messages before we
|
@@ -45,6 +45,8 @@ module Karafka
|
|
45
45
|
@messages_buffer = MessagesBuffer.new(subscription_group)
|
46
46
|
@mutex = Mutex.new
|
47
47
|
@stopped = false
|
48
|
+
|
49
|
+
@jobs_queue.register(@subscription_group.id)
|
48
50
|
end
|
49
51
|
|
50
52
|
# Runs the main listener fetch loop.
|
@@ -230,7 +232,7 @@ module Karafka
|
|
230
232
|
# here. In cases like this, we do not run a revocation job
|
231
233
|
@executors.find_all(topic, partition).each do |executor|
|
232
234
|
job = @jobs_builder.revoked(executor)
|
233
|
-
job.
|
235
|
+
job.before_schedule
|
234
236
|
jobs << job
|
235
237
|
end
|
236
238
|
|
@@ -243,7 +245,7 @@ module Karafka
|
|
243
245
|
end
|
244
246
|
end
|
245
247
|
|
246
|
-
@scheduler.
|
248
|
+
@scheduler.on_schedule_revocation(jobs)
|
247
249
|
end
|
248
250
|
|
249
251
|
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
@@ -252,11 +254,11 @@ module Karafka
|
|
252
254
|
|
253
255
|
@executors.each do |executor|
|
254
256
|
job = @jobs_builder.shutdown(executor)
|
255
|
-
job.
|
257
|
+
job.before_schedule
|
256
258
|
jobs << job
|
257
259
|
end
|
258
260
|
|
259
|
-
@scheduler.
|
261
|
+
@scheduler.on_schedule_shutdown(jobs)
|
260
262
|
end
|
261
263
|
|
262
264
|
# Polls messages within the time and amount boundaries defined in the settings and then
|
@@ -296,14 +298,17 @@ module Karafka
|
|
296
298
|
end
|
297
299
|
end
|
298
300
|
|
299
|
-
jobs.each(&:
|
301
|
+
jobs.each(&:before_schedule)
|
300
302
|
|
301
|
-
@scheduler.
|
303
|
+
@scheduler.on_schedule_consumption(jobs)
|
302
304
|
end
|
303
305
|
|
304
306
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
305
307
|
def wait
|
306
|
-
@jobs_queue.wait(@subscription_group.id)
|
308
|
+
@jobs_queue.wait(@subscription_group.id) do
|
309
|
+
@events_poller.call
|
310
|
+
@scheduler.on_manage
|
311
|
+
end
|
307
312
|
end
|
308
313
|
|
309
314
|
# Waits without blocking the polling
|
@@ -319,6 +324,8 @@ module Karafka
|
|
319
324
|
def wait_pinging(wait_until:, after_ping: -> {})
|
320
325
|
until wait_until.call
|
321
326
|
@client.ping
|
327
|
+
@scheduler.on_manage
|
328
|
+
|
322
329
|
after_ping.call
|
323
330
|
sleep(0.2)
|
324
331
|
end
|
@@ -334,6 +341,7 @@ module Karafka
|
|
334
341
|
# resetting.
|
335
342
|
@jobs_queue.wait(@subscription_group.id)
|
336
343
|
@jobs_queue.clear(@subscription_group.id)
|
344
|
+
@scheduler.on_clear(@subscription_group.id)
|
337
345
|
@events_poller.reset
|
338
346
|
@client.reset
|
339
347
|
@coordinators.reset
|
@@ -11,6 +11,10 @@ module Karafka
|
|
11
11
|
# @param jobs_queue [JobsQueue]
|
12
12
|
# @return [ListenersBatch]
|
13
13
|
def initialize(jobs_queue)
|
14
|
+
# We need one scheduler for all the listeners because in case of complex schedulers, they
|
15
|
+
# should be able to distribute work whenever any work is done in any of the listeners
|
16
|
+
scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
|
17
|
+
|
14
18
|
@coordinators = []
|
15
19
|
|
16
20
|
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
@@ -24,7 +28,8 @@ module Karafka
|
|
24
28
|
Connection::Listener.new(
|
25
29
|
consumer_group_coordinator,
|
26
30
|
subscription_group,
|
27
|
-
jobs_queue
|
31
|
+
jobs_queue,
|
32
|
+
scheduler
|
28
33
|
)
|
29
34
|
end
|
30
35
|
end
|
@@ -73,7 +73,8 @@ module Karafka
|
|
73
73
|
|
74
74
|
nested(:processing) do
|
75
75
|
required(:jobs_builder) { |val| !val.nil? }
|
76
|
-
required(:
|
76
|
+
required(:jobs_queue_class) { |val| !val.nil? }
|
77
|
+
required(:scheduler_class) { |val| !val.nil? }
|
77
78
|
required(:coordinator_class) { |val| !val.nil? }
|
78
79
|
required(:partitioner_class) { |val| !val.nil? }
|
79
80
|
required(:strategy_selector) { |val| !val.nil? }
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
# Keeps track of active assignments and materializes them by returning the routing topics
|
6
|
+
# with appropriate partitions that are assigned at a given moment
|
7
|
+
#
|
8
|
+
# It is auto-subscribed as part of Karafka itself.
|
9
|
+
#
|
10
|
+
# It is not heavy from the computational point of view, as it only operates during rebalances.
|
11
|
+
#
|
12
|
+
# We keep assignments as flat topics structure because we can go from topics to both
|
13
|
+
# subscription and consumer groups if needed.
|
14
|
+
class AssignmentsTracker
|
15
|
+
include Singleton
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@mutex = Mutex.new
|
19
|
+
@assignments = Hash.new { |hash, key| hash[key] = [] }
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns all the active/current assignments of this given process
|
23
|
+
#
|
24
|
+
# @return [Hash<Karafka::Routing::Topic, Array<Integer>>]
|
25
|
+
#
|
26
|
+
# @note Keep in mind, that those assignments can change any time, especially when working
|
27
|
+
# with multiple consumer groups or subscription groups.
|
28
|
+
#
|
29
|
+
# @note We return a copy because we modify internals and we do not want user to tamper with
|
30
|
+
# the data accidentally
|
31
|
+
def current
|
32
|
+
assignments = {}
|
33
|
+
|
34
|
+
@assignments.each do |topic, partitions|
|
35
|
+
assignments[topic] = partitions.dup.freeze
|
36
|
+
end
|
37
|
+
|
38
|
+
assignments.freeze
|
39
|
+
end
|
40
|
+
|
41
|
+
# Clears all the assignments
|
42
|
+
def clear
|
43
|
+
@mutex.synchronize do
|
44
|
+
@assignments.clear
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# When client is under reset due to critical issues, remove all of its assignments as we will
|
49
|
+
# get a new set of assignments
|
50
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
51
|
+
def on_client_reset(event)
|
52
|
+
sg = event[:subscription_group]
|
53
|
+
|
54
|
+
@mutex.synchronize do
|
55
|
+
@assignments.delete_if do |topic, _partitions|
|
56
|
+
topic.subscription_group.id == sg.id
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Removes partitions from the current assignments hash
|
62
|
+
#
|
63
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
64
|
+
def on_rebalance_partitions_revoked(event)
|
65
|
+
sg = event[:subscription_group]
|
66
|
+
|
67
|
+
@mutex.synchronize do
|
68
|
+
event[:tpl].to_h.each do |topic, partitions|
|
69
|
+
topic = sg.topics.find(topic)
|
70
|
+
|
71
|
+
@assignments[topic] -= partitions.map(&:partition)
|
72
|
+
@assignments[topic].sort!
|
73
|
+
# Remove completely topics for which we do not have any assignments left
|
74
|
+
@assignments.delete_if { |_topic, cur_partitions| cur_partitions.empty? }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# # Adds partitions to the current assignments hash
|
80
|
+
#
|
81
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
82
|
+
def on_rebalance_partitions_assigned(event)
|
83
|
+
sg = event[:subscription_group]
|
84
|
+
|
85
|
+
@mutex.synchronize do
|
86
|
+
event[:tpl].to_h.each do |topic, partitions|
|
87
|
+
topic = sg.topics.find(topic)
|
88
|
+
|
89
|
+
@assignments[topic] += partitions.map(&:partition)
|
90
|
+
@assignments[topic].sort!
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -6,11 +6,10 @@ module Karafka
|
|
6
6
|
# Callback that connects to the librdkafka rebalance callback and converts those events into
|
7
7
|
# our internal events
|
8
8
|
class Rebalance
|
9
|
-
# @param
|
10
|
-
#
|
11
|
-
def initialize(
|
12
|
-
@
|
13
|
-
@consumer_group_id = consumer_group_id
|
9
|
+
# @param subscription_group [Karafka::Routes::SubscriptionGroup] subscription group for
|
10
|
+
# which we want to manage rebalances
|
11
|
+
def initialize(subscription_group)
|
12
|
+
@subscription_group = subscription_group
|
14
13
|
end
|
15
14
|
|
16
15
|
# Publishes an event that partitions are going to be revoked.
|
@@ -53,8 +52,12 @@ module Karafka
|
|
53
52
|
::Karafka.monitor.instrument(
|
54
53
|
"rebalance.#{name}",
|
55
54
|
caller: self,
|
56
|
-
|
57
|
-
|
55
|
+
# We keep the id references here for backwards compatibility as some of the monitors
|
56
|
+
# may use the id references
|
57
|
+
subscription_group_id: @subscription_group.id,
|
58
|
+
subscription_group: @subscription_group,
|
59
|
+
consumer_group_id: @subscription_group.consumer_group.id,
|
60
|
+
consumer_group: @subscription_group.consumer_group,
|
58
61
|
tpl: tpl
|
59
62
|
)
|
60
63
|
end
|
@@ -241,15 +241,6 @@ module Karafka
|
|
241
241
|
when 'consumer.revoked.error'
|
242
242
|
error "Consumer on revoked failed due to an error: #{error}"
|
243
243
|
error details
|
244
|
-
when 'consumer.before_enqueue.error'
|
245
|
-
error "Consumer before enqueue failed due to an error: #{error}"
|
246
|
-
error details
|
247
|
-
when 'consumer.before_consume.error'
|
248
|
-
error "Consumer before consume failed due to an error: #{error}"
|
249
|
-
error details
|
250
|
-
when 'consumer.after_consume.error'
|
251
|
-
error "Consumer after consume failed due to an error: #{error}"
|
252
|
-
error details
|
253
244
|
when 'consumer.idle.error'
|
254
245
|
error "Consumer idle failed due to an error: #{error}"
|
255
246
|
error details
|
@@ -20,6 +20,7 @@ module Karafka
|
|
20
20
|
active_job.consume
|
21
21
|
active_job.consumed
|
22
22
|
|
23
|
+
app.initializing
|
23
24
|
app.initialized
|
24
25
|
app.running
|
25
26
|
app.quieting
|
@@ -30,26 +31,28 @@ module Karafka
|
|
30
31
|
|
31
32
|
client.pause
|
32
33
|
client.resume
|
34
|
+
client.reset
|
33
35
|
|
34
36
|
connection.listener.before_fetch_loop
|
35
37
|
connection.listener.fetch_loop
|
36
38
|
connection.listener.fetch_loop.received
|
37
39
|
|
38
|
-
connection.client.poll.error
|
39
|
-
connection.client.unsubscribe.error
|
40
|
-
|
41
40
|
rebalance.partitions_assign
|
42
41
|
rebalance.partitions_assigned
|
43
42
|
rebalance.partitions_revoke
|
44
43
|
rebalance.partitions_revoked
|
45
44
|
|
45
|
+
consumer.before_schedule_consume
|
46
46
|
consumer.consume
|
47
47
|
consumer.consumed
|
48
48
|
consumer.consuming.pause
|
49
49
|
consumer.consuming.retry
|
50
|
+
consumer.before_schedule_idle
|
50
51
|
consumer.idle
|
52
|
+
consumer.before_schedule_revoked
|
51
53
|
consumer.revoke
|
52
54
|
consumer.revoked
|
55
|
+
consumer.before_schedule_shutdown
|
53
56
|
consumer.shutting_down
|
54
57
|
consumer.shutdown
|
55
58
|
|
@@ -94,8 +94,8 @@ module Karafka
|
|
94
94
|
error "Consumer consuming error: #{error}"
|
95
95
|
when 'consumer.revoked.error'
|
96
96
|
error "Consumer on revoked failed due to an error: #{error}"
|
97
|
-
when 'consumer.
|
98
|
-
error "Consumer before
|
97
|
+
when 'consumer.before_schedule.error'
|
98
|
+
error "Consumer before schedule failed due to an error: #{error}"
|
99
99
|
when 'consumer.before_consume.error'
|
100
100
|
error "Consumer before consume failed due to an error: #{error}"
|
101
101
|
when 'consumer.after_consume.error'
|