karafka 2.2.14 → 2.3.0.alpha1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +38 -12
- data/.ruby-version +1 -1
- data/CHANGELOG.md +23 -0
- data/Gemfile.lock +12 -12
- data/README.md +0 -2
- data/SECURITY.md +23 -0
- data/config/locales/errors.yml +7 -1
- data/config/locales/pro_errors.yml +22 -0
- data/docker-compose.yml +1 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin/acl.rb +287 -0
- data/lib/karafka/admin.rb +9 -13
- data/lib/karafka/app.rb +5 -3
- data/lib/karafka/base_consumer.rb +9 -1
- data/lib/karafka/cli/base.rb +1 -1
- data/lib/karafka/connection/client.rb +83 -76
- data/lib/karafka/connection/conductor.rb +28 -0
- data/lib/karafka/connection/listener.rb +159 -42
- data/lib/karafka/connection/listeners_batch.rb +5 -11
- data/lib/karafka/connection/manager.rb +72 -0
- data/lib/karafka/connection/messages_buffer.rb +12 -0
- data/lib/karafka/connection/proxy.rb +17 -0
- data/lib/karafka/connection/status.rb +75 -0
- data/lib/karafka/contracts/config.rb +14 -10
- data/lib/karafka/contracts/consumer_group.rb +9 -1
- data/lib/karafka/contracts/topic.rb +3 -1
- data/lib/karafka/errors.rb +13 -0
- data/lib/karafka/instrumentation/logger_listener.rb +3 -0
- data/lib/karafka/instrumentation/notifications.rb +13 -5
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
- data/lib/karafka/pro/base_consumer.rb +47 -0
- data/lib/karafka/pro/connection/manager.rb +300 -0
- data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
- data/lib/karafka/pro/iterator.rb +1 -6
- data/lib/karafka/pro/loader.rb +14 -0
- data/lib/karafka/pro/processing/coordinator.rb +2 -1
- data/lib/karafka/pro/processing/executor.rb +37 -0
- data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
- data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
- data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
- data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
- data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
- data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
- data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
- data/lib/karafka/pro/processing/strategies/default.rb +134 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +59 -25
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
- data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
- data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
- data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
- data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
- data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
- data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
- data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
- data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
- data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
- data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
- data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
- data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
- data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
- data/lib/karafka/process.rb +5 -3
- data/lib/karafka/processing/coordinator.rb +5 -1
- data/lib/karafka/processing/executor.rb +16 -10
- data/lib/karafka/processing/executors_buffer.rb +19 -4
- data/lib/karafka/processing/schedulers/default.rb +3 -2
- data/lib/karafka/processing/strategies/default.rb +6 -0
- data/lib/karafka/processing/strategies/dlq.rb +36 -0
- data/lib/karafka/routing/builder.rb +12 -2
- data/lib/karafka/routing/consumer_group.rb +5 -5
- data/lib/karafka/routing/features/base.rb +44 -8
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
- data/lib/karafka/routing/topic.rb +8 -10
- data/lib/karafka/runner.rb +13 -3
- data/lib/karafka/server.rb +5 -9
- data/lib/karafka/setup/config.rb +17 -0
- data/lib/karafka/status.rb +23 -14
- data/lib/karafka/templates/karafka.rb.erb +7 -0
- data/lib/karafka/time_trackers/partition_usage.rb +56 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +42 -10
- metadata.gz.sig +0 -0
- data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
@@ -7,6 +7,8 @@ module Karafka
|
|
7
7
|
# critical errors by restarting everything in a safe manner.
|
8
8
|
#
|
9
9
|
# This is the heart of the consumption process.
|
10
|
+
#
|
11
|
+
# It provides async API for managing, so all status changes are expected to be async.
|
10
12
|
class Listener
|
11
13
|
include Helpers::Async
|
12
14
|
|
@@ -14,22 +16,23 @@ module Karafka
|
|
14
16
|
# @return [String] id of this listener
|
15
17
|
attr_reader :id
|
16
18
|
|
19
|
+
# @return [Karafka::Routing::SubscriptionGroup] subscription group that this listener handles
|
20
|
+
attr_reader :subscription_group
|
21
|
+
|
17
22
|
# How long to wait in the initial events poll. Increases chances of having the initial events
|
18
23
|
# immediately available
|
19
24
|
INITIAL_EVENTS_POLL_TIMEOUT = 100
|
20
25
|
|
21
26
|
private_constant :INITIAL_EVENTS_POLL_TIMEOUT
|
22
27
|
|
23
|
-
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
24
28
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
25
29
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
26
30
|
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
27
31
|
# @return [Karafka::Connection::Listener] listener instance
|
28
|
-
def initialize(
|
32
|
+
def initialize(subscription_group, jobs_queue, scheduler)
|
29
33
|
proc_config = ::Karafka::App.config.internal.processing
|
30
34
|
|
31
35
|
@id = SecureRandom.hex(6)
|
32
|
-
@consumer_group_coordinator = consumer_group_coordinator
|
33
36
|
@subscription_group = subscription_group
|
34
37
|
@jobs_queue = jobs_queue
|
35
38
|
@coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
|
@@ -43,8 +46,9 @@ module Karafka
|
|
43
46
|
# We can do this that way because we always first schedule jobs using messages before we
|
44
47
|
# fetch another batch.
|
45
48
|
@messages_buffer = MessagesBuffer.new(subscription_group)
|
49
|
+
@usage_tracker = TimeTrackers::PartitionUsage.new
|
46
50
|
@mutex = Mutex.new
|
47
|
-
@
|
51
|
+
@status = Status.new
|
48
52
|
|
49
53
|
@jobs_queue.register(@subscription_group.id)
|
50
54
|
end
|
@@ -62,6 +66,44 @@ module Karafka
|
|
62
66
|
)
|
63
67
|
|
64
68
|
fetch_loop
|
69
|
+
|
70
|
+
Karafka.monitor.instrument(
|
71
|
+
'connection.listener.after_fetch_loop',
|
72
|
+
caller: self,
|
73
|
+
client: @client,
|
74
|
+
subscription_group: @subscription_group
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Aliases all statuses operations directly on the listener so we have a listener-facing API
|
79
|
+
Status::STATES.each do |state, transition|
|
80
|
+
# @return [Boolean] is the listener in a given state
|
81
|
+
define_method "#{state}?" do
|
82
|
+
@status.public_send("#{state}?")
|
83
|
+
end
|
84
|
+
|
85
|
+
# Moves listener to a given state
|
86
|
+
define_method transition do
|
87
|
+
@status.public_send(transition)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# @return [Boolean] is this listener active (not stopped and not pending)
|
92
|
+
def active?
|
93
|
+
@status.active?
|
94
|
+
end
|
95
|
+
|
96
|
+
# We overwrite the state `#start` because on start we need to also start running listener in
|
97
|
+
# the async thread. While other state transitions happen automatically and status state
|
98
|
+
# change is enough, here we need to run the background threads
|
99
|
+
def start!
|
100
|
+
if stopped?
|
101
|
+
@client.reset
|
102
|
+
@status.reset!
|
103
|
+
end
|
104
|
+
|
105
|
+
@status.start!
|
106
|
+
async_call
|
65
107
|
end
|
66
108
|
|
67
109
|
# Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
|
@@ -72,13 +114,16 @@ module Karafka
|
|
72
114
|
#
|
73
115
|
# @note We wrap it with a mutex exactly because of the above case of forceful shutdown
|
74
116
|
def shutdown
|
75
|
-
return if @stopped
|
76
|
-
|
77
117
|
@mutex.synchronize do
|
78
|
-
|
118
|
+
return if stopped?
|
119
|
+
# Nothing to clear if it was not even running
|
120
|
+
return stopped! if pending?
|
121
|
+
|
79
122
|
@executors.clear
|
80
123
|
@coordinators.reset
|
81
124
|
@client.stop
|
125
|
+
|
126
|
+
stopped!
|
82
127
|
end
|
83
128
|
end
|
84
129
|
|
@@ -93,6 +138,7 @@ module Karafka
|
|
93
138
|
# Kafka connections / Internet connection issues / Etc. Business logic problems should not
|
94
139
|
# propagate this far.
|
95
140
|
def fetch_loop
|
141
|
+
running!
|
96
142
|
# Run the initial events fetch to improve chances of having metrics and initial callbacks
|
97
143
|
# triggers on start.
|
98
144
|
#
|
@@ -103,7 +149,7 @@ module Karafka
|
|
103
149
|
@client.events_poll(INITIAL_EVENTS_POLL_TIMEOUT)
|
104
150
|
|
105
151
|
# Run the main loop as long as we are not stopping or moving into quiet mode
|
106
|
-
|
152
|
+
while running?
|
107
153
|
Karafka.monitor.instrument(
|
108
154
|
'connection.listener.fetch_loop',
|
109
155
|
caller: self,
|
@@ -138,7 +184,11 @@ module Karafka
|
|
138
184
|
# simplifies the overall design and prevents from race conditions
|
139
185
|
wait
|
140
186
|
|
141
|
-
|
187
|
+
build_and_schedule_flow_jobs
|
188
|
+
|
189
|
+
# periodic jobs never run on topics and partitions that were scheduled, so no risk in
|
190
|
+
# having collective wait after both
|
191
|
+
build_and_schedule_periodic_jobs if Karafka.pro?
|
142
192
|
|
143
193
|
wait
|
144
194
|
end
|
@@ -170,18 +220,11 @@ module Karafka
|
|
170
220
|
# Wait until all the shutdown jobs are done
|
171
221
|
wait_pinging(wait_until: -> { @jobs_queue.empty?(@subscription_group.id) })
|
172
222
|
|
173
|
-
|
174
|
-
# within this consumer group
|
175
|
-
@consumer_group_coordinator.finish_work(id)
|
223
|
+
quieted!
|
176
224
|
|
177
225
|
# Wait if we're in the process of finishing started work or finished all the work and
|
178
226
|
# just sitting and being quiet
|
179
|
-
wait_pinging(wait_until: -> { !
|
180
|
-
|
181
|
-
# We need to wait until all the work in the whole consumer group (local to the process)
|
182
|
-
# is done. Otherwise we may end up with locks and `Timed out LeaveGroupRequest in flight`
|
183
|
-
# warning notifications.
|
184
|
-
wait_pinging(wait_until: -> { @consumer_group_coordinator.shutdown? })
|
227
|
+
wait_pinging(wait_until: -> { !quiet? })
|
185
228
|
|
186
229
|
# This extra ping will make sure we've refreshed the rebalance state after other instances
|
187
230
|
# potentially shutdown. This will prevent us from closing with a dangling callback
|
@@ -200,11 +243,9 @@ module Karafka
|
|
200
243
|
type: 'connection.listener.fetch_loop.error'
|
201
244
|
)
|
202
245
|
|
203
|
-
|
246
|
+
reset
|
204
247
|
|
205
248
|
sleep(1) && retry
|
206
|
-
ensure
|
207
|
-
@consumer_group_coordinator.unlock
|
208
249
|
end
|
209
250
|
|
210
251
|
# Resumes processing of partitions that were paused due to an error.
|
@@ -214,6 +255,17 @@ module Karafka
|
|
214
255
|
end
|
215
256
|
end
|
216
257
|
|
258
|
+
# Polls messages within the time and amount boundaries defined in the settings and then
|
259
|
+
# builds karafka messages based on the raw rdkafka messages buffer returned by the
|
260
|
+
# `#batch_poll` method.
|
261
|
+
#
|
262
|
+
# @note There are two buffers, one for raw messages and one for "built" karafka messages
|
263
|
+
def poll_and_remap_messages
|
264
|
+
@messages_buffer.remap(
|
265
|
+
@client.batch_poll
|
266
|
+
)
|
267
|
+
end
|
268
|
+
|
217
269
|
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
218
270
|
def build_and_schedule_revoked_jobs_for_revoked_partitions
|
219
271
|
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
@@ -225,6 +277,7 @@ module Karafka
|
|
225
277
|
|
226
278
|
revoked_partitions.each do |topic, partitions|
|
227
279
|
partitions.each do |partition|
|
280
|
+
@usage_tracker.revoke(topic, partition)
|
228
281
|
@coordinators.revoke(topic, partition)
|
229
282
|
|
230
283
|
# There may be a case where we have lost partition of which data we have never
|
@@ -232,7 +285,6 @@ module Karafka
|
|
232
285
|
# here. In cases like this, we do not run a revocation job
|
233
286
|
@executors.find_all(topic, partition).each do |executor|
|
234
287
|
job = @jobs_builder.revoked(executor)
|
235
|
-
job.before_schedule
|
236
288
|
jobs << job
|
237
289
|
end
|
238
290
|
|
@@ -245,6 +297,9 @@ module Karafka
|
|
245
297
|
end
|
246
298
|
end
|
247
299
|
|
300
|
+
return if jobs.empty?
|
301
|
+
|
302
|
+
jobs.each(&:before_schedule)
|
248
303
|
@scheduler.on_schedule_revocation(jobs)
|
249
304
|
end
|
250
305
|
|
@@ -254,32 +309,27 @@ module Karafka
|
|
254
309
|
|
255
310
|
@executors.each do |executor|
|
256
311
|
job = @jobs_builder.shutdown(executor)
|
257
|
-
job.before_schedule
|
258
312
|
jobs << job
|
259
313
|
end
|
260
314
|
|
261
|
-
|
262
|
-
end
|
315
|
+
return if jobs.empty?
|
263
316
|
|
264
|
-
|
265
|
-
|
266
|
-
# `#batch_poll` method.
|
267
|
-
#
|
268
|
-
# @note There are two buffers, one for raw messages and one for "built" karafka messages
|
269
|
-
def poll_and_remap_messages
|
270
|
-
@messages_buffer.remap(
|
271
|
-
@client.batch_poll
|
272
|
-
)
|
317
|
+
jobs.each(&:before_schedule)
|
318
|
+
@scheduler.on_schedule_shutdown(jobs)
|
273
319
|
end
|
274
320
|
|
275
321
|
# Takes the messages per topic partition and enqueues processing jobs in threads using
|
276
|
-
# given scheduler.
|
277
|
-
|
322
|
+
# given scheduler. It also handles the idle jobs when filtering API removed all messages
|
323
|
+
# and we need to run house-keeping
|
324
|
+
def build_and_schedule_flow_jobs
|
278
325
|
return if @messages_buffer.empty?
|
279
326
|
|
280
|
-
|
327
|
+
consume_jobs = []
|
328
|
+
idle_jobs = []
|
281
329
|
|
282
330
|
@messages_buffer.each do |topic, partition, messages|
|
331
|
+
@usage_tracker.track(topic, partition)
|
332
|
+
|
283
333
|
coordinator = @coordinators.find_or_create(topic, partition)
|
284
334
|
# Start work coordination for this topic partition
|
285
335
|
coordinator.start(messages)
|
@@ -288,19 +338,86 @@ module Karafka
|
|
288
338
|
# and it will not go through a standard lifecycle. Same applies to revoked and shutdown
|
289
339
|
if messages.empty?
|
290
340
|
executor = @executors.find_or_create(topic, partition, 0, coordinator)
|
291
|
-
|
341
|
+
idle_jobs << @jobs_builder.idle(executor)
|
292
342
|
else
|
293
343
|
@partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
|
294
344
|
executor = @executors.find_or_create(topic, partition, group_id, coordinator)
|
295
345
|
coordinator.increment
|
296
|
-
|
346
|
+
consume_jobs << @jobs_builder.consume(executor, partition_messages)
|
297
347
|
end
|
298
348
|
end
|
299
349
|
end
|
300
350
|
|
301
|
-
jobs
|
351
|
+
# We schedule the idle jobs before running the `#before_schedule` on the consume jobs so
|
352
|
+
# workers can already pick up the idle jobs while the `#before_schedule` on consumption
|
353
|
+
# jobs runs
|
354
|
+
unless idle_jobs.empty?
|
355
|
+
idle_jobs.each(&:before_schedule)
|
356
|
+
@scheduler.on_schedule_idle(idle_jobs)
|
357
|
+
end
|
358
|
+
|
359
|
+
unless consume_jobs.empty?
|
360
|
+
consume_jobs.each(&:before_schedule)
|
361
|
+
@scheduler.on_schedule_consumption(consume_jobs)
|
362
|
+
end
|
363
|
+
end
|
302
364
|
|
303
|
-
|
365
|
+
# Builds and schedules periodic jobs for topics partitions for which no messages were
|
366
|
+
# received recently. In case `Idle` job is invoked, we do not run periodic. Idle means that
|
367
|
+
# a complex flow kicked in and it was a user choice not to run consumption but messages were
|
368
|
+
# shipped.
|
369
|
+
def build_and_schedule_periodic_jobs
|
370
|
+
# Shortcut if periodic jobs are not used at all. No need to run the complex flow when it
|
371
|
+
# will never end up with anything. If periodics on any of the topics are not even defined,
|
372
|
+
# we can finish fast
|
373
|
+
@periodic_jobs ||= @subscription_group.topics.count(&:periodic_job?)
|
374
|
+
|
375
|
+
return if @periodic_jobs.zero?
|
376
|
+
|
377
|
+
jobs = []
|
378
|
+
|
379
|
+
# We select only currently assigned topics and partitions from the current subscription
|
380
|
+
# group as only those are of our interest. We then filter that to only pick those for whom
|
381
|
+
# we want to run periodic jobs and then we select only those that did not receive any
|
382
|
+
# messages recently. This ensures, that we do not tick close to recent arrival of messages
|
383
|
+
# but rather after certain period of inactivity
|
384
|
+
Karafka::App.assignments.each do |topic, partitions|
|
385
|
+
# Skip for assignments not from our subscription group
|
386
|
+
next unless topic.subscription_group == @subscription_group
|
387
|
+
# Skip if this topic does not have periodic jobs enabled
|
388
|
+
next unless topic.periodic_job?
|
389
|
+
|
390
|
+
topic_name = topic.name
|
391
|
+
interval = topic.periodic_job.interval
|
392
|
+
|
393
|
+
partitions.each do |partition|
|
394
|
+
# Skip if we were operating on a given topic partition recently
|
395
|
+
next if @usage_tracker.active?(topic_name, partition, interval)
|
396
|
+
|
397
|
+
coordinator = @coordinators.find_or_create(topic_name, partition)
|
398
|
+
|
399
|
+
# Do not tick if we do not want to tick during pauses
|
400
|
+
next if coordinator.paused? && !topic.periodic_job.during_pause?
|
401
|
+
|
402
|
+
# If we do not want to run periodics during retry flows, we should not
|
403
|
+
# Since this counter is incremented before processing, here it is always -1 from what
|
404
|
+
# we see in the consumer flow. This is why attempt 0 means that we will have first
|
405
|
+
# run (ok) but attempt 1 means, there was an error and we will retry
|
406
|
+
next if coordinator.attempt.positive? && !topic.periodic_job.during_retry?
|
407
|
+
|
408
|
+
# Track so we do not run periodic job again too soon
|
409
|
+
@usage_tracker.track(topic_name, partition)
|
410
|
+
|
411
|
+
@executors.find_all_or_create(topic_name, partition, coordinator).each do |executor|
|
412
|
+
jobs << @jobs_builder.periodic(executor)
|
413
|
+
end
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
return if jobs.empty?
|
418
|
+
|
419
|
+
jobs.each(&:before_schedule)
|
420
|
+
@scheduler.on_schedule_periodic(jobs)
|
304
421
|
end
|
305
422
|
|
306
423
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
@@ -335,7 +452,7 @@ module Karafka
|
|
335
452
|
# `#fetch_loop` again. We just need to remember to also reset the runner as it is a long
|
336
453
|
# running one, so with a new connection to Kafka, we need to initialize the state of the
|
337
454
|
# runner and underlying consumers once again.
|
338
|
-
def
|
455
|
+
def reset
|
339
456
|
# If there was any problem with processing, before we reset things we need to make sure,
|
340
457
|
# there are no jobs in the queue. Otherwise it could lead to leakage in between client
|
341
458
|
# resetting.
|
@@ -6,8 +6,6 @@ module Karafka
|
|
6
6
|
class ListenersBatch
|
7
7
|
include Enumerable
|
8
8
|
|
9
|
-
attr_reader :coordinators
|
10
|
-
|
11
9
|
# @param jobs_queue [JobsQueue]
|
12
10
|
# @return [ListenersBatch]
|
13
11
|
def initialize(jobs_queue)
|
@@ -15,18 +13,9 @@ module Karafka
|
|
15
13
|
# should be able to distribute work whenever any work is done in any of the listeners
|
16
14
|
scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
|
17
15
|
|
18
|
-
@coordinators = []
|
19
|
-
|
20
16
|
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
21
|
-
consumer_group_coordinator = Connection::ConsumerGroupCoordinator.new(
|
22
|
-
subscription_groups.size
|
23
|
-
)
|
24
|
-
|
25
|
-
@coordinators << consumer_group_coordinator
|
26
|
-
|
27
17
|
subscription_groups.map do |subscription_group|
|
28
18
|
Connection::Listener.new(
|
29
|
-
consumer_group_coordinator,
|
30
19
|
subscription_group,
|
31
20
|
jobs_queue,
|
32
21
|
scheduler
|
@@ -40,6 +29,11 @@ module Karafka
|
|
40
29
|
def each(&block)
|
41
30
|
@batch.each(&block)
|
42
31
|
end
|
32
|
+
|
33
|
+
# @return [Array<Listener>] active listeners
|
34
|
+
def active
|
35
|
+
select(&:active?)
|
36
|
+
end
|
43
37
|
end
|
44
38
|
end
|
45
39
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# Namespace for Kafka connection related logic
|
5
|
+
module Connection
|
6
|
+
# Connections manager responsible for starting and managing listeners connections
|
7
|
+
#
|
8
|
+
# In the OSS version it starts listeners as they are without any connection management or
|
9
|
+
# resources utilization supervision and shuts them down or quiets when time has come
|
10
|
+
class Manager
|
11
|
+
def initialize
|
12
|
+
@once_executions = Set.new
|
13
|
+
end
|
14
|
+
|
15
|
+
# Registers provided listeners and starts all of them
|
16
|
+
#
|
17
|
+
# @param listeners [Connection::ListenersBatch]
|
18
|
+
def register(listeners)
|
19
|
+
@listeners = listeners
|
20
|
+
@listeners.each(&:start!)
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Boolean] true if all listeners are stopped
|
24
|
+
def done?
|
25
|
+
@listeners.all?(&:stopped?)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Controls the state of listeners upon shutdown and quiet requests
|
29
|
+
# In both cases (quieting and shutdown) we first need to stop processing more work and tell
|
30
|
+
# listeners to become quiet (connected but not yielding messages) and then depending on
|
31
|
+
# whether we want to stop fully or just keep quiet we apply different flow.
|
32
|
+
#
|
33
|
+
# @note It is important to ensure, that all listeners from the same consumer group are always
|
34
|
+
# all quiet before we can fully shutdown given consumer group. Skipping this can cause
|
35
|
+
# `Timed out LeaveGroupRequest in flight` and other errors. For the simplification, we just
|
36
|
+
# quiet all and only then move forward.
|
37
|
+
#
|
38
|
+
# @note This manager works with the assumption, that all listeners are executed on register.
|
39
|
+
def control
|
40
|
+
# Do nothing until shutdown or quiet
|
41
|
+
return unless Karafka::App.done?
|
42
|
+
|
43
|
+
# When we are done processing, immediately quiet all the listeners so they do not pick up
|
44
|
+
# new work to do
|
45
|
+
once(:quiet!) { @listeners.each(&:quiet!) }
|
46
|
+
|
47
|
+
return unless @listeners.all?(&:quiet?)
|
48
|
+
|
49
|
+
# If we are in the process of moving to quiet state, we need to check it.
|
50
|
+
# Switch to quieted status only when all listeners are fully quieted and do nothing after
|
51
|
+
# that until further state changes
|
52
|
+
once(:quieted!) { Karafka::App.quieted! } if Karafka::App.quieting?
|
53
|
+
|
54
|
+
return if Karafka::App.quiet?
|
55
|
+
|
56
|
+
once(:stop!) { @listeners.each(&:stop!) }
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
# Runs code only once and never again
|
62
|
+
# @param args [Object] anything we want to use as a set of unique keys for given execution
|
63
|
+
def once(*args)
|
64
|
+
return if @once_executions.include?(args)
|
65
|
+
|
66
|
+
@once_executions << args
|
67
|
+
|
68
|
+
yield
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -67,6 +67,18 @@ module Karafka
|
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
70
|
+
# Checks if there are any messages from a given topic partition in the buffer
|
71
|
+
# @param topic [String] topic name
|
72
|
+
# @param partition [Integer] partition number
|
73
|
+
# @return [Boolean] true if there is at least one message from this topic partition,
|
74
|
+
# otherwise false
|
75
|
+
def present?(topic, partition)
|
76
|
+
return false unless @groups.include?(topic)
|
77
|
+
return false unless @groups[topic].include?(partition)
|
78
|
+
|
79
|
+
true
|
80
|
+
end
|
81
|
+
|
70
82
|
# @return [Boolean] is the buffer empty or does it contain any messages
|
71
83
|
def empty?
|
72
84
|
@size.zero?
|
@@ -68,6 +68,23 @@ module Karafka
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
+
# Similar to `#query_watermark_offsets`.
|
72
|
+
#
|
73
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList, nil] tpl or nil for full current
|
74
|
+
# assignment tpl usage
|
75
|
+
# @return [Rdkafka::Consumer::TopicPartitionList] tpl with committed offsets and metadata
|
76
|
+
def committed(tpl = nil)
|
77
|
+
c_config = @config.committed
|
78
|
+
|
79
|
+
with_broker_errors_retry(
|
80
|
+
# required to be in seconds, not ms
|
81
|
+
wait_time: c_config.wait_time / 1_000.to_f,
|
82
|
+
max_attempts: c_config.max_attempts
|
83
|
+
) do
|
84
|
+
@wrapped.committed(tpl, c_config.timeout)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
71
88
|
private
|
72
89
|
|
73
90
|
# Runs expected block of code with few retries on all_brokers_down
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# Namespace for Kafka connection related logic
|
5
|
+
module Connection
|
6
|
+
# Listener connection status representation
|
7
|
+
class Status
|
8
|
+
# Available states and their transitions.
|
9
|
+
STATES = {
|
10
|
+
pending: :pending!,
|
11
|
+
starting: :start!,
|
12
|
+
running: :running!,
|
13
|
+
quieting: :quiet!,
|
14
|
+
quiet: :quieted!,
|
15
|
+
stopping: :stop!,
|
16
|
+
stopped: :stopped!
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
STATES.each do |state, transition|
|
20
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
21
|
+
# Moves status to a different state
|
22
|
+
def #{transition}
|
23
|
+
@mutex.synchronize do
|
24
|
+
# Do not allow reverse state transitions (we always go one way) or transition to the
|
25
|
+
# same state as currently
|
26
|
+
return if @status && STATES.keys.index(:#{state}) <= STATES.keys.index(@status)
|
27
|
+
|
28
|
+
@status = :#{state}
|
29
|
+
@conductor.signal
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Boolean] are we in a given state
|
34
|
+
def #{state}?
|
35
|
+
@status == :#{state}
|
36
|
+
end
|
37
|
+
RUBY
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize
|
41
|
+
@mutex = Mutex.new
|
42
|
+
@conductor = Karafka::App.config.internal.connection.conductor
|
43
|
+
pending!
|
44
|
+
end
|
45
|
+
|
46
|
+
# If this listener was not even running, will just move it through states until final.
|
47
|
+
# If it was running, will start the stopping procedures.
|
48
|
+
# Will do nothing if it was already stopped
|
49
|
+
def stop!
|
50
|
+
if pending?
|
51
|
+
@status = :stopping
|
52
|
+
stopped!
|
53
|
+
elsif stopped?
|
54
|
+
nil
|
55
|
+
else
|
56
|
+
@status = :stopping
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Moves status back from stopped to pending (and only that). We should not be able to reset
|
61
|
+
# listeners that are not stopped
|
62
|
+
def reset!
|
63
|
+
return unless stopped?
|
64
|
+
|
65
|
+
@status = :pending
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Boolean] listener is considered active when it has a client reference that may
|
69
|
+
# be active and connected to Kafka
|
70
|
+
def active?
|
71
|
+
!pending? && !stopped?
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -51,17 +51,21 @@ module Karafka
|
|
51
51
|
required(:tick_interval) { |val| val.is_a?(Integer) && val >= 1_000 }
|
52
52
|
|
53
53
|
nested(:connection) do
|
54
|
-
|
55
|
-
|
56
|
-
required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
|
57
|
-
required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
|
58
|
-
required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
|
59
|
-
end
|
54
|
+
required(:manager) { |val| !val.nil? }
|
55
|
+
required(:conductor) { |val| !val.nil? }
|
60
56
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
57
|
+
nested(:proxy) do
|
58
|
+
# All of them have the same requirements
|
59
|
+
%i[
|
60
|
+
query_watermark_offsets
|
61
|
+
offsets_for_times
|
62
|
+
committed
|
63
|
+
].each do |scope|
|
64
|
+
nested(scope) do
|
65
|
+
required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
|
66
|
+
required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
|
67
|
+
required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
|
68
|
+
end
|
65
69
|
end
|
66
70
|
end
|
67
71
|
end
|
@@ -18,7 +18,7 @@ module Karafka
|
|
18
18
|
virtual do |data, errors|
|
19
19
|
next unless errors.empty?
|
20
20
|
|
21
|
-
names = data.fetch(:topics).map { |topic| topic
|
21
|
+
names = data.fetch(:topics).map { |topic| topic_unique_key(topic) }
|
22
22
|
|
23
23
|
next if names.size == names.uniq.size
|
24
24
|
|
@@ -51,6 +51,14 @@ module Karafka
|
|
51
51
|
|
52
52
|
[[%i[topics], :topics_namespaced_names_not_unique]]
|
53
53
|
end
|
54
|
+
|
55
|
+
class << self
|
56
|
+
# @param topic [Hash] topic config hash
|
57
|
+
# @return [String] topic unique key for validators
|
58
|
+
def topic_unique_key(topic)
|
59
|
+
topic[:name]
|
60
|
+
end
|
61
|
+
end
|
54
62
|
end
|
55
63
|
end
|
56
64
|
end
|
@@ -20,7 +20,9 @@ module Karafka
|
|
20
20
|
required(:max_wait_time) { |val| val.is_a?(Integer) && val >= 10 }
|
21
21
|
required(:name) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
|
22
22
|
required(:active) { |val| [true, false].include?(val) }
|
23
|
-
|
23
|
+
nested(:subscription_group_details) do
|
24
|
+
required(:name) { |val| val.is_a?(String) && !val.empty? }
|
25
|
+
end
|
24
26
|
|
25
27
|
# Consumer needs to be present only if topic is active
|
26
28
|
# We allow not to define consumer for non-active because they may be only used via admin
|
data/lib/karafka/errors.rb
CHANGED
@@ -63,5 +63,18 @@ module Karafka
|
|
63
63
|
|
64
64
|
# Raised when there is an attempt to run an unrecognized CLI command
|
65
65
|
UnrecognizedCommandError = Class.new(BaseError)
|
66
|
+
|
67
|
+
# Raised when we attempt to perform operation that is only allowed inside of a transaction and
|
68
|
+
# there is no transaction around us
|
69
|
+
TransactionRequiredError = Class.new(BaseError)
|
70
|
+
|
71
|
+
# Raised in case user would want to perform nested transactions.
|
72
|
+
TransactionAlreadyInitializedError = Class.new(BaseError)
|
73
|
+
|
74
|
+
# Raised in case a listener that was paused is being resumed
|
75
|
+
InvalidListenerResumeError = Class.new(BaseError)
|
76
|
+
|
77
|
+
# Raised when we want to un-pause listener that was not paused
|
78
|
+
InvalidListenerPauseError = Class.new(BaseError)
|
66
79
|
end
|
67
80
|
end
|
@@ -247,6 +247,9 @@ module Karafka
|
|
247
247
|
when 'consumer.shutdown.error'
|
248
248
|
error "Consumer on shutdown failed due to an error: #{error}"
|
249
249
|
error details
|
250
|
+
when 'consumer.tick.error'
|
251
|
+
error "Consumer tick failed due to an error: #{error}"
|
252
|
+
error details
|
250
253
|
when 'worker.process.error'
|
251
254
|
fatal "Worker processing failed due to an error: #{error}"
|
252
255
|
fatal details
|