karafka 2.2.13 → 2.3.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +38 -12
- data/.ruby-version +1 -1
- data/CHANGELOG.md +161 -125
- data/Gemfile.lock +12 -12
- data/README.md +0 -2
- data/SECURITY.md +23 -0
- data/config/locales/errors.yml +7 -1
- data/config/locales/pro_errors.yml +22 -0
- data/docker-compose.yml +3 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin/acl.rb +287 -0
- data/lib/karafka/admin.rb +118 -16
- data/lib/karafka/app.rb +12 -3
- data/lib/karafka/base_consumer.rb +32 -31
- data/lib/karafka/cli/base.rb +1 -1
- data/lib/karafka/connection/client.rb +94 -84
- data/lib/karafka/connection/conductor.rb +28 -0
- data/lib/karafka/connection/listener.rb +165 -46
- data/lib/karafka/connection/listeners_batch.rb +5 -11
- data/lib/karafka/connection/manager.rb +72 -0
- data/lib/karafka/connection/messages_buffer.rb +12 -0
- data/lib/karafka/connection/proxy.rb +17 -0
- data/lib/karafka/connection/status.rb +75 -0
- data/lib/karafka/contracts/config.rb +14 -10
- data/lib/karafka/contracts/consumer_group.rb +9 -1
- data/lib/karafka/contracts/topic.rb +3 -1
- data/lib/karafka/errors.rb +13 -0
- data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
- data/lib/karafka/instrumentation/logger_listener.rb +3 -9
- data/lib/karafka/instrumentation/notifications.rb +19 -9
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
- data/lib/karafka/pro/base_consumer.rb +47 -0
- data/lib/karafka/pro/connection/manager.rb +300 -0
- data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
- data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
- data/lib/karafka/pro/iterator.rb +1 -6
- data/lib/karafka/pro/loader.rb +16 -2
- data/lib/karafka/pro/processing/coordinator.rb +2 -1
- data/lib/karafka/pro/processing/executor.rb +37 -0
- data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
- data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
- data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
- data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
- data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
- data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
- data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/default.rb +136 -3
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
- data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
- data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
- data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
- data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
- data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
- data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
- data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
- data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
- data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
- data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
- data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
- data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
- data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
- data/lib/karafka/process.rb +5 -3
- data/lib/karafka/processing/coordinator.rb +5 -1
- data/lib/karafka/processing/executor.rb +43 -13
- data/lib/karafka/processing/executors_buffer.rb +22 -7
- data/lib/karafka/processing/jobs/base.rb +19 -2
- data/lib/karafka/processing/jobs/consume.rb +3 -3
- data/lib/karafka/processing/jobs/idle.rb +5 -0
- data/lib/karafka/processing/jobs/revoked.rb +5 -0
- data/lib/karafka/processing/jobs/shutdown.rb +5 -0
- data/lib/karafka/processing/jobs_queue.rb +19 -8
- data/lib/karafka/processing/schedulers/default.rb +42 -0
- data/lib/karafka/processing/strategies/base.rb +13 -4
- data/lib/karafka/processing/strategies/default.rb +23 -7
- data/lib/karafka/processing/strategies/dlq.rb +36 -0
- data/lib/karafka/processing/worker.rb +4 -1
- data/lib/karafka/routing/builder.rb +12 -2
- data/lib/karafka/routing/consumer_group.rb +5 -5
- data/lib/karafka/routing/features/base.rb +44 -8
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
- data/lib/karafka/routing/proxy.rb +4 -3
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
- data/lib/karafka/routing/topic.rb +8 -10
- data/lib/karafka/routing/topics.rb +1 -1
- data/lib/karafka/runner.rb +13 -3
- data/lib/karafka/server.rb +5 -9
- data/lib/karafka/setup/config.rb +21 -1
- data/lib/karafka/status.rb +23 -14
- data/lib/karafka/templates/karafka.rb.erb +7 -0
- data/lib/karafka/time_trackers/partition_usage.rb +56 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +47 -13
- metadata.gz.sig +0 -0
- data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
- data/lib/karafka/pro/performance_tracker.rb +0 -84
- data/lib/karafka/pro/processing/scheduler.rb +0 -74
- data/lib/karafka/processing/scheduler.rb +0 -38
|
@@ -7,6 +7,8 @@ module Karafka
|
|
|
7
7
|
# critical errors by restarting everything in a safe manner.
|
|
8
8
|
#
|
|
9
9
|
# This is the heart of the consumption process.
|
|
10
|
+
#
|
|
11
|
+
# It provides async API for managing, so all status changes are expected to be async.
|
|
10
12
|
class Listener
|
|
11
13
|
include Helpers::Async
|
|
12
14
|
|
|
@@ -14,22 +16,23 @@ module Karafka
|
|
|
14
16
|
# @return [String] id of this listener
|
|
15
17
|
attr_reader :id
|
|
16
18
|
|
|
19
|
+
# @return [Karafka::Routing::SubscriptionGroup] subscription group that this listener handles
|
|
20
|
+
attr_reader :subscription_group
|
|
21
|
+
|
|
17
22
|
# How long to wait in the initial events poll. Increases chances of having the initial events
|
|
18
23
|
# immediately available
|
|
19
24
|
INITIAL_EVENTS_POLL_TIMEOUT = 100
|
|
20
25
|
|
|
21
26
|
private_constant :INITIAL_EVENTS_POLL_TIMEOUT
|
|
22
27
|
|
|
23
|
-
# @param consumer_group_coordinator [Karafka::Connection::ConsumerGroupCoordinator]
|
|
24
28
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
|
25
29
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
|
26
30
|
# @param scheduler [Karafka::Processing::Scheduler] scheduler we want to use
|
|
27
31
|
# @return [Karafka::Connection::Listener] listener instance
|
|
28
|
-
def initialize(
|
|
32
|
+
def initialize(subscription_group, jobs_queue, scheduler)
|
|
29
33
|
proc_config = ::Karafka::App.config.internal.processing
|
|
30
34
|
|
|
31
35
|
@id = SecureRandom.hex(6)
|
|
32
|
-
@consumer_group_coordinator = consumer_group_coordinator
|
|
33
36
|
@subscription_group = subscription_group
|
|
34
37
|
@jobs_queue = jobs_queue
|
|
35
38
|
@coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
|
|
@@ -43,8 +46,11 @@ module Karafka
|
|
|
43
46
|
# We can do this that way because we always first schedule jobs using messages before we
|
|
44
47
|
# fetch another batch.
|
|
45
48
|
@messages_buffer = MessagesBuffer.new(subscription_group)
|
|
49
|
+
@usage_tracker = TimeTrackers::PartitionUsage.new
|
|
46
50
|
@mutex = Mutex.new
|
|
47
|
-
@
|
|
51
|
+
@status = Status.new
|
|
52
|
+
|
|
53
|
+
@jobs_queue.register(@subscription_group.id)
|
|
48
54
|
end
|
|
49
55
|
|
|
50
56
|
# Runs the main listener fetch loop.
|
|
@@ -60,6 +66,44 @@ module Karafka
|
|
|
60
66
|
)
|
|
61
67
|
|
|
62
68
|
fetch_loop
|
|
69
|
+
|
|
70
|
+
Karafka.monitor.instrument(
|
|
71
|
+
'connection.listener.after_fetch_loop',
|
|
72
|
+
caller: self,
|
|
73
|
+
client: @client,
|
|
74
|
+
subscription_group: @subscription_group
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Aliases all statuses operations directly on the listener so we have a listener-facing API
|
|
79
|
+
Status::STATES.each do |state, transition|
|
|
80
|
+
# @return [Boolean] is the listener in a given state
|
|
81
|
+
define_method "#{state}?" do
|
|
82
|
+
@status.public_send("#{state}?")
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Moves listener to a given state
|
|
86
|
+
define_method transition do
|
|
87
|
+
@status.public_send(transition)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @return [Boolean] is this listener active (not stopped and not pending)
|
|
92
|
+
def active?
|
|
93
|
+
@status.active?
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# We overwrite the state `#start` because on start we need to also start running listener in
|
|
97
|
+
# the async thread. While other state transitions happen automatically and status state
|
|
98
|
+
# change is enough, here we need to run the background threads
|
|
99
|
+
def start!
|
|
100
|
+
if stopped?
|
|
101
|
+
@client.reset
|
|
102
|
+
@status.reset!
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
@status.start!
|
|
106
|
+
async_call
|
|
63
107
|
end
|
|
64
108
|
|
|
65
109
|
# Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
|
|
@@ -70,13 +114,16 @@ module Karafka
|
|
|
70
114
|
#
|
|
71
115
|
# @note We wrap it with a mutex exactly because of the above case of forceful shutdown
|
|
72
116
|
def shutdown
|
|
73
|
-
return if @stopped
|
|
74
|
-
|
|
75
117
|
@mutex.synchronize do
|
|
76
|
-
|
|
118
|
+
return if stopped?
|
|
119
|
+
# Nothing to clear if it was not even running
|
|
120
|
+
return stopped! if pending?
|
|
121
|
+
|
|
77
122
|
@executors.clear
|
|
78
123
|
@coordinators.reset
|
|
79
124
|
@client.stop
|
|
125
|
+
|
|
126
|
+
stopped!
|
|
80
127
|
end
|
|
81
128
|
end
|
|
82
129
|
|
|
@@ -91,6 +138,7 @@ module Karafka
|
|
|
91
138
|
# Kafka connections / Internet connection issues / Etc. Business logic problems should not
|
|
92
139
|
# propagate this far.
|
|
93
140
|
def fetch_loop
|
|
141
|
+
running!
|
|
94
142
|
# Run the initial events fetch to improve chances of having metrics and initial callbacks
|
|
95
143
|
# triggers on start.
|
|
96
144
|
#
|
|
@@ -101,7 +149,7 @@ module Karafka
|
|
|
101
149
|
@client.events_poll(INITIAL_EVENTS_POLL_TIMEOUT)
|
|
102
150
|
|
|
103
151
|
# Run the main loop as long as we are not stopping or moving into quiet mode
|
|
104
|
-
|
|
152
|
+
while running?
|
|
105
153
|
Karafka.monitor.instrument(
|
|
106
154
|
'connection.listener.fetch_loop',
|
|
107
155
|
caller: self,
|
|
@@ -136,7 +184,11 @@ module Karafka
|
|
|
136
184
|
# simplifies the overall design and prevents from race conditions
|
|
137
185
|
wait
|
|
138
186
|
|
|
139
|
-
|
|
187
|
+
build_and_schedule_flow_jobs
|
|
188
|
+
|
|
189
|
+
# periodic jobs never run on topics and partitions that were scheduled, so no risk in
|
|
190
|
+
# having collective wait after both
|
|
191
|
+
build_and_schedule_periodic_jobs if Karafka.pro?
|
|
140
192
|
|
|
141
193
|
wait
|
|
142
194
|
end
|
|
@@ -168,18 +220,11 @@ module Karafka
|
|
|
168
220
|
# Wait until all the shutdown jobs are done
|
|
169
221
|
wait_pinging(wait_until: -> { @jobs_queue.empty?(@subscription_group.id) })
|
|
170
222
|
|
|
171
|
-
|
|
172
|
-
# within this consumer group
|
|
173
|
-
@consumer_group_coordinator.finish_work(id)
|
|
223
|
+
quieted!
|
|
174
224
|
|
|
175
225
|
# Wait if we're in the process of finishing started work or finished all the work and
|
|
176
226
|
# just sitting and being quiet
|
|
177
|
-
wait_pinging(wait_until: -> { !
|
|
178
|
-
|
|
179
|
-
# We need to wait until all the work in the whole consumer group (local to the process)
|
|
180
|
-
# is done. Otherwise we may end up with locks and `Timed out LeaveGroupRequest in flight`
|
|
181
|
-
# warning notifications.
|
|
182
|
-
wait_pinging(wait_until: -> { @consumer_group_coordinator.shutdown? })
|
|
227
|
+
wait_pinging(wait_until: -> { !quiet? })
|
|
183
228
|
|
|
184
229
|
# This extra ping will make sure we've refreshed the rebalance state after other instances
|
|
185
230
|
# potentially shutdown. This will prevent us from closing with a dangling callback
|
|
@@ -198,11 +243,9 @@ module Karafka
|
|
|
198
243
|
type: 'connection.listener.fetch_loop.error'
|
|
199
244
|
)
|
|
200
245
|
|
|
201
|
-
|
|
246
|
+
reset
|
|
202
247
|
|
|
203
248
|
sleep(1) && retry
|
|
204
|
-
ensure
|
|
205
|
-
@consumer_group_coordinator.unlock
|
|
206
249
|
end
|
|
207
250
|
|
|
208
251
|
# Resumes processing of partitions that were paused due to an error.
|
|
@@ -212,6 +255,17 @@ module Karafka
|
|
|
212
255
|
end
|
|
213
256
|
end
|
|
214
257
|
|
|
258
|
+
# Polls messages within the time and amount boundaries defined in the settings and then
|
|
259
|
+
# builds karafka messages based on the raw rdkafka messages buffer returned by the
|
|
260
|
+
# `#batch_poll` method.
|
|
261
|
+
#
|
|
262
|
+
# @note There are two buffers, one for raw messages and one for "built" karafka messages
|
|
263
|
+
def poll_and_remap_messages
|
|
264
|
+
@messages_buffer.remap(
|
|
265
|
+
@client.batch_poll
|
|
266
|
+
)
|
|
267
|
+
end
|
|
268
|
+
|
|
215
269
|
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
|
216
270
|
def build_and_schedule_revoked_jobs_for_revoked_partitions
|
|
217
271
|
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
|
@@ -223,6 +277,7 @@ module Karafka
|
|
|
223
277
|
|
|
224
278
|
revoked_partitions.each do |topic, partitions|
|
|
225
279
|
partitions.each do |partition|
|
|
280
|
+
@usage_tracker.revoke(topic, partition)
|
|
226
281
|
@coordinators.revoke(topic, partition)
|
|
227
282
|
|
|
228
283
|
# There may be a case where we have lost partition of which data we have never
|
|
@@ -230,7 +285,6 @@ module Karafka
|
|
|
230
285
|
# here. In cases like this, we do not run a revocation job
|
|
231
286
|
@executors.find_all(topic, partition).each do |executor|
|
|
232
287
|
job = @jobs_builder.revoked(executor)
|
|
233
|
-
job.before_enqueue
|
|
234
288
|
jobs << job
|
|
235
289
|
end
|
|
236
290
|
|
|
@@ -243,7 +297,10 @@ module Karafka
|
|
|
243
297
|
end
|
|
244
298
|
end
|
|
245
299
|
|
|
246
|
-
|
|
300
|
+
return if jobs.empty?
|
|
301
|
+
|
|
302
|
+
jobs.each(&:before_schedule)
|
|
303
|
+
@scheduler.on_schedule_revocation(jobs)
|
|
247
304
|
end
|
|
248
305
|
|
|
249
306
|
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
|
@@ -252,32 +309,27 @@ module Karafka
|
|
|
252
309
|
|
|
253
310
|
@executors.each do |executor|
|
|
254
311
|
job = @jobs_builder.shutdown(executor)
|
|
255
|
-
job.before_enqueue
|
|
256
312
|
jobs << job
|
|
257
313
|
end
|
|
258
314
|
|
|
259
|
-
|
|
260
|
-
end
|
|
315
|
+
return if jobs.empty?
|
|
261
316
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
# `#batch_poll` method.
|
|
265
|
-
#
|
|
266
|
-
# @note There are two buffers, one for raw messages and one for "built" karafka messages
|
|
267
|
-
def poll_and_remap_messages
|
|
268
|
-
@messages_buffer.remap(
|
|
269
|
-
@client.batch_poll
|
|
270
|
-
)
|
|
317
|
+
jobs.each(&:before_schedule)
|
|
318
|
+
@scheduler.on_schedule_shutdown(jobs)
|
|
271
319
|
end
|
|
272
320
|
|
|
273
321
|
# Takes the messages per topic partition and enqueues processing jobs in threads using
|
|
274
|
-
# given scheduler.
|
|
275
|
-
|
|
322
|
+
# given scheduler. It also handles the idle jobs when filtering API removed all messages
|
|
323
|
+
# and we need to run house-keeping
|
|
324
|
+
def build_and_schedule_flow_jobs
|
|
276
325
|
return if @messages_buffer.empty?
|
|
277
326
|
|
|
278
|
-
|
|
327
|
+
consume_jobs = []
|
|
328
|
+
idle_jobs = []
|
|
279
329
|
|
|
280
330
|
@messages_buffer.each do |topic, partition, messages|
|
|
331
|
+
@usage_tracker.track(topic, partition)
|
|
332
|
+
|
|
281
333
|
coordinator = @coordinators.find_or_create(topic, partition)
|
|
282
334
|
# Start work coordination for this topic partition
|
|
283
335
|
coordinator.start(messages)
|
|
@@ -286,26 +338,93 @@ module Karafka
|
|
|
286
338
|
# and it will not go through a standard lifecycle. Same applies to revoked and shutdown
|
|
287
339
|
if messages.empty?
|
|
288
340
|
executor = @executors.find_or_create(topic, partition, 0, coordinator)
|
|
289
|
-
|
|
341
|
+
idle_jobs << @jobs_builder.idle(executor)
|
|
290
342
|
else
|
|
291
343
|
@partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
|
|
292
344
|
executor = @executors.find_or_create(topic, partition, group_id, coordinator)
|
|
293
345
|
coordinator.increment
|
|
294
|
-
|
|
346
|
+
consume_jobs << @jobs_builder.consume(executor, partition_messages)
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# We schedule the idle jobs before running the `#before_schedule` on the consume jobs so
|
|
352
|
+
# workers can already pick up the idle jobs while the `#before_schedule` on consumption
|
|
353
|
+
# jobs runs
|
|
354
|
+
unless idle_jobs.empty?
|
|
355
|
+
idle_jobs.each(&:before_schedule)
|
|
356
|
+
@scheduler.on_schedule_idle(idle_jobs)
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
unless consume_jobs.empty?
|
|
360
|
+
consume_jobs.each(&:before_schedule)
|
|
361
|
+
@scheduler.on_schedule_consumption(consume_jobs)
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Builds and schedules periodic jobs for topics partitions for which no messages were
|
|
366
|
+
# received recently. In case `Idle` job is invoked, we do not run periodic. Idle means that
|
|
367
|
+
# a complex flow kicked in and it was a user choice not to run consumption but messages were
|
|
368
|
+
# shipped.
|
|
369
|
+
def build_and_schedule_periodic_jobs
|
|
370
|
+
# Shortcut if periodic jobs are not used at all. No need to run the complex flow when it
|
|
371
|
+
# will never end up with anything. If periodics on any of the topics are not even defined,
|
|
372
|
+
# we can finish fast
|
|
373
|
+
@periodic_jobs ||= @subscription_group.topics.count(&:periodic_job?)
|
|
374
|
+
|
|
375
|
+
return if @periodic_jobs.zero?
|
|
376
|
+
|
|
377
|
+
jobs = []
|
|
378
|
+
|
|
379
|
+
# We select only currently assigned topics and partitions from the current subscription
|
|
380
|
+
# group as only those are of our interest. We then filter that to only pick those for whom
|
|
381
|
+
# we want to run periodic jobs and then we select only those that did not receive any
|
|
382
|
+
# messages recently. This ensures, that we do not tick close to recent arrival of messages
|
|
383
|
+
# but rather after certain period of inactivity
|
|
384
|
+
Karafka::App.assignments.each do |topic, partitions|
|
|
385
|
+
# Skip for assignments not from our subscription group
|
|
386
|
+
next unless topic.subscription_group == @subscription_group
|
|
387
|
+
# Skip if this topic does not have periodic jobs enabled
|
|
388
|
+
next unless topic.periodic_job?
|
|
389
|
+
|
|
390
|
+
topic_name = topic.name
|
|
391
|
+
interval = topic.periodic_job.interval
|
|
392
|
+
|
|
393
|
+
partitions.each do |partition|
|
|
394
|
+
# Skip if we were operating on a given topic partition recently
|
|
395
|
+
next if @usage_tracker.active?(topic_name, partition, interval)
|
|
396
|
+
|
|
397
|
+
coordinator = @coordinators.find_or_create(topic_name, partition)
|
|
398
|
+
|
|
399
|
+
# Do not tick if we do not want to tick during pauses
|
|
400
|
+
next if coordinator.paused? && !topic.periodic_job.during_pause?
|
|
401
|
+
|
|
402
|
+
# If we do not want to run periodics during retry flows, we should not
|
|
403
|
+
# Since this counter is incremented before processing, here it is always -1 from what
|
|
404
|
+
# we see in the consumer flow. This is why attempt 0 means that we will have first
|
|
405
|
+
# run (ok) but attempt 1 means, there was an error and we will retry
|
|
406
|
+
next if coordinator.attempt.positive? && !topic.periodic_job.during_retry?
|
|
407
|
+
|
|
408
|
+
# Track so we do not run periodic job again too soon
|
|
409
|
+
@usage_tracker.track(topic_name, partition)
|
|
410
|
+
|
|
411
|
+
@executors.find_all_or_create(topic_name, partition, coordinator).each do |executor|
|
|
412
|
+
jobs << @jobs_builder.periodic(executor)
|
|
295
413
|
end
|
|
296
414
|
end
|
|
297
415
|
end
|
|
298
416
|
|
|
299
|
-
jobs.
|
|
417
|
+
return if jobs.empty?
|
|
300
418
|
|
|
301
|
-
|
|
419
|
+
jobs.each(&:before_schedule)
|
|
420
|
+
@scheduler.on_schedule_periodic(jobs)
|
|
302
421
|
end
|
|
303
422
|
|
|
304
423
|
# Waits for all the jobs from a given subscription group to finish before moving forward
|
|
305
424
|
def wait
|
|
306
425
|
@jobs_queue.wait(@subscription_group.id) do
|
|
307
426
|
@events_poller.call
|
|
308
|
-
@scheduler.
|
|
427
|
+
@scheduler.on_manage
|
|
309
428
|
end
|
|
310
429
|
end
|
|
311
430
|
|
|
@@ -322,7 +441,7 @@ module Karafka
|
|
|
322
441
|
def wait_pinging(wait_until:, after_ping: -> {})
|
|
323
442
|
until wait_until.call
|
|
324
443
|
@client.ping
|
|
325
|
-
@scheduler.
|
|
444
|
+
@scheduler.on_manage
|
|
326
445
|
|
|
327
446
|
after_ping.call
|
|
328
447
|
sleep(0.2)
|
|
@@ -333,13 +452,13 @@ module Karafka
|
|
|
333
452
|
# `#fetch_loop` again. We just need to remember to also reset the runner as it is a long
|
|
334
453
|
# running one, so with a new connection to Kafka, we need to initialize the state of the
|
|
335
454
|
# runner and underlying consumers once again.
|
|
336
|
-
def
|
|
455
|
+
def reset
|
|
337
456
|
# If there was any problem with processing, before we reset things we need to make sure,
|
|
338
457
|
# there are no jobs in the queue. Otherwise it could lead to leakage in between client
|
|
339
458
|
# resetting.
|
|
340
459
|
@jobs_queue.wait(@subscription_group.id)
|
|
341
460
|
@jobs_queue.clear(@subscription_group.id)
|
|
342
|
-
@scheduler.
|
|
461
|
+
@scheduler.on_clear(@subscription_group.id)
|
|
343
462
|
@events_poller.reset
|
|
344
463
|
@client.reset
|
|
345
464
|
@coordinators.reset
|
|
@@ -6,8 +6,6 @@ module Karafka
|
|
|
6
6
|
class ListenersBatch
|
|
7
7
|
include Enumerable
|
|
8
8
|
|
|
9
|
-
attr_reader :coordinators
|
|
10
|
-
|
|
11
9
|
# @param jobs_queue [JobsQueue]
|
|
12
10
|
# @return [ListenersBatch]
|
|
13
11
|
def initialize(jobs_queue)
|
|
@@ -15,18 +13,9 @@ module Karafka
|
|
|
15
13
|
# should be able to distribute work whenever any work is done in any of the listeners
|
|
16
14
|
scheduler = App.config.internal.processing.scheduler_class.new(jobs_queue)
|
|
17
15
|
|
|
18
|
-
@coordinators = []
|
|
19
|
-
|
|
20
16
|
@batch = App.subscription_groups.flat_map do |_consumer_group, subscription_groups|
|
|
21
|
-
consumer_group_coordinator = Connection::ConsumerGroupCoordinator.new(
|
|
22
|
-
subscription_groups.size
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
@coordinators << consumer_group_coordinator
|
|
26
|
-
|
|
27
17
|
subscription_groups.map do |subscription_group|
|
|
28
18
|
Connection::Listener.new(
|
|
29
|
-
consumer_group_coordinator,
|
|
30
19
|
subscription_group,
|
|
31
20
|
jobs_queue,
|
|
32
21
|
scheduler
|
|
@@ -40,6 +29,11 @@ module Karafka
|
|
|
40
29
|
def each(&block)
|
|
41
30
|
@batch.each(&block)
|
|
42
31
|
end
|
|
32
|
+
|
|
33
|
+
# @return [Array<Listener>] active listeners
|
|
34
|
+
def active
|
|
35
|
+
select(&:active?)
|
|
36
|
+
end
|
|
43
37
|
end
|
|
44
38
|
end
|
|
45
39
|
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
# Namespace for Kafka connection related logic
|
|
5
|
+
module Connection
|
|
6
|
+
# Connections manager responsible for starting and managing listeners connections
|
|
7
|
+
#
|
|
8
|
+
# In the OSS version it starts listeners as they are without any connection management or
|
|
9
|
+
# resources utilization supervision and shuts them down or quiets when time has come
|
|
10
|
+
class Manager
|
|
11
|
+
def initialize
|
|
12
|
+
@once_executions = Set.new
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Registers provided listeners and starts all of them
|
|
16
|
+
#
|
|
17
|
+
# @param listeners [Connection::ListenersBatch]
|
|
18
|
+
def register(listeners)
|
|
19
|
+
@listeners = listeners
|
|
20
|
+
@listeners.each(&:start!)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [Boolean] true if all listeners are stopped
|
|
24
|
+
def done?
|
|
25
|
+
@listeners.all?(&:stopped?)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Controls the state of listeners upon shutdown and quiet requests
|
|
29
|
+
# In both cases (quieting and shutdown) we first need to stop processing more work and tell
|
|
30
|
+
# listeners to become quiet (connected but not yielding messages) and then depending on
|
|
31
|
+
# whether we want to stop fully or just keep quiet we apply different flow.
|
|
32
|
+
#
|
|
33
|
+
# @note It is important to ensure, that all listeners from the same consumer group are always
|
|
34
|
+
# all quiet before we can fully shutdown given consumer group. Skipping this can cause
|
|
35
|
+
# `Timed out LeaveGroupRequest in flight` and other errors. For the simplification, we just
|
|
36
|
+
# quiet all and only then move forward.
|
|
37
|
+
#
|
|
38
|
+
# @note This manager works with the assumption, that all listeners are executed on register.
|
|
39
|
+
def control
|
|
40
|
+
# Do nothing until shutdown or quiet
|
|
41
|
+
return unless Karafka::App.done?
|
|
42
|
+
|
|
43
|
+
# When we are done processing, immediately quiet all the listeners so they do not pick up
|
|
44
|
+
# new work to do
|
|
45
|
+
once(:quiet!) { @listeners.each(&:quiet!) }
|
|
46
|
+
|
|
47
|
+
return unless @listeners.all?(&:quiet?)
|
|
48
|
+
|
|
49
|
+
# If we are in the process of moving to quiet state, we need to check it.
|
|
50
|
+
# Switch to quieted status only when all listeners are fully quieted and do nothing after
|
|
51
|
+
# that until further state changes
|
|
52
|
+
once(:quieted!) { Karafka::App.quieted! } if Karafka::App.quieting?
|
|
53
|
+
|
|
54
|
+
return if Karafka::App.quiet?
|
|
55
|
+
|
|
56
|
+
once(:stop!) { @listeners.each(&:stop!) }
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
# Runs code only once and never again
|
|
62
|
+
# @param args [Object] anything we want to use as a set of unique keys for given execution
|
|
63
|
+
def once(*args)
|
|
64
|
+
return if @once_executions.include?(args)
|
|
65
|
+
|
|
66
|
+
@once_executions << args
|
|
67
|
+
|
|
68
|
+
yield
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -67,6 +67,18 @@ module Karafka
|
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
|
|
70
|
+
# Checks if there are any messages from a given topic partition in the buffer
|
|
71
|
+
# @param topic [String] topic name
|
|
72
|
+
# @param partition [Integer] partition number
|
|
73
|
+
# @return [Boolean] true if there is at least one message from this topic partition,
|
|
74
|
+
# otherwise false
|
|
75
|
+
def present?(topic, partition)
|
|
76
|
+
return false unless @groups.include?(topic)
|
|
77
|
+
return false unless @groups[topic].include?(partition)
|
|
78
|
+
|
|
79
|
+
true
|
|
80
|
+
end
|
|
81
|
+
|
|
70
82
|
# @return [Boolean] is the buffer empty or does it contain any messages
|
|
71
83
|
def empty?
|
|
72
84
|
@size.zero?
|
|
@@ -68,6 +68,23 @@ module Karafka
|
|
|
68
68
|
end
|
|
69
69
|
end
|
|
70
70
|
|
|
71
|
+
# Similar to `#query_watermark_offsets`.
|
|
72
|
+
#
|
|
73
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList, nil] tpl or nil for full current
|
|
74
|
+
# assignment tpl usage
|
|
75
|
+
# @return [Rdkafka::Consumer::TopicPartitionList] tpl with committed offsets and metadata
|
|
76
|
+
def committed(tpl = nil)
|
|
77
|
+
c_config = @config.committed
|
|
78
|
+
|
|
79
|
+
with_broker_errors_retry(
|
|
80
|
+
# required to be in seconds, not ms
|
|
81
|
+
wait_time: c_config.wait_time / 1_000.to_f,
|
|
82
|
+
max_attempts: c_config.max_attempts
|
|
83
|
+
) do
|
|
84
|
+
@wrapped.committed(tpl, c_config.timeout)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
71
88
|
private
|
|
72
89
|
|
|
73
90
|
# Runs expected block of code with few retries on all_brokers_down
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
# Namespace for Kafka connection related logic
|
|
5
|
+
module Connection
|
|
6
|
+
# Listener connection status representation
|
|
7
|
+
class Status
|
|
8
|
+
# Available states and their transitions.
|
|
9
|
+
STATES = {
|
|
10
|
+
pending: :pending!,
|
|
11
|
+
starting: :start!,
|
|
12
|
+
running: :running!,
|
|
13
|
+
quieting: :quiet!,
|
|
14
|
+
quiet: :quieted!,
|
|
15
|
+
stopping: :stop!,
|
|
16
|
+
stopped: :stopped!
|
|
17
|
+
}.freeze
|
|
18
|
+
|
|
19
|
+
STATES.each do |state, transition|
|
|
20
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
|
21
|
+
# Moves status to a different state
|
|
22
|
+
def #{transition}
|
|
23
|
+
@mutex.synchronize do
|
|
24
|
+
# Do not allow reverse state transitions (we always go one way) or transition to the
|
|
25
|
+
# same state as currently
|
|
26
|
+
return if @status && STATES.keys.index(:#{state}) <= STATES.keys.index(@status)
|
|
27
|
+
|
|
28
|
+
@status = :#{state}
|
|
29
|
+
@conductor.signal
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @return [Boolean] are we in a given state
|
|
34
|
+
def #{state}?
|
|
35
|
+
@status == :#{state}
|
|
36
|
+
end
|
|
37
|
+
RUBY
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def initialize
|
|
41
|
+
@mutex = Mutex.new
|
|
42
|
+
@conductor = Karafka::App.config.internal.connection.conductor
|
|
43
|
+
pending!
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# If this listener was not even running, will just move it through states until final.
|
|
47
|
+
# If it was running, will start the stopping procedures.
|
|
48
|
+
# Will do nothing if it was already stopped
|
|
49
|
+
def stop!
|
|
50
|
+
if pending?
|
|
51
|
+
@status = :stopping
|
|
52
|
+
stopped!
|
|
53
|
+
elsif stopped?
|
|
54
|
+
nil
|
|
55
|
+
else
|
|
56
|
+
@status = :stopping
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Moves status back from stopped to pending (and only that). We should not be able to reset
|
|
61
|
+
# listeners that are not stopped
|
|
62
|
+
def reset!
|
|
63
|
+
return unless stopped?
|
|
64
|
+
|
|
65
|
+
@status = :pending
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# @return [Boolean] listener is considered active when it has a client reference that may
|
|
69
|
+
# be active and connected to Kafka
|
|
70
|
+
def active?
|
|
71
|
+
!pending? && !stopped?
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -51,17 +51,21 @@ module Karafka
|
|
|
51
51
|
required(:tick_interval) { |val| val.is_a?(Integer) && val >= 1_000 }
|
|
52
52
|
|
|
53
53
|
nested(:connection) do
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
|
|
57
|
-
required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
|
|
58
|
-
required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
|
|
59
|
-
end
|
|
54
|
+
required(:manager) { |val| !val.nil? }
|
|
55
|
+
required(:conductor) { |val| !val.nil? }
|
|
60
56
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
57
|
+
nested(:proxy) do
|
|
58
|
+
# All of them have the same requirements
|
|
59
|
+
%i[
|
|
60
|
+
query_watermark_offsets
|
|
61
|
+
offsets_for_times
|
|
62
|
+
committed
|
|
63
|
+
].each do |scope|
|
|
64
|
+
nested(scope) do
|
|
65
|
+
required(:timeout) { |val| val.is_a?(Integer) && val.positive? }
|
|
66
|
+
required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
|
|
67
|
+
required(:wait_time) { |val| val.is_a?(Integer) && val.positive? }
|
|
68
|
+
end
|
|
65
69
|
end
|
|
66
70
|
end
|
|
67
71
|
end
|
|
@@ -18,7 +18,7 @@ module Karafka
|
|
|
18
18
|
virtual do |data, errors|
|
|
19
19
|
next unless errors.empty?
|
|
20
20
|
|
|
21
|
-
names = data.fetch(:topics).map { |topic| topic
|
|
21
|
+
names = data.fetch(:topics).map { |topic| topic_unique_key(topic) }
|
|
22
22
|
|
|
23
23
|
next if names.size == names.uniq.size
|
|
24
24
|
|
|
@@ -51,6 +51,14 @@ module Karafka
|
|
|
51
51
|
|
|
52
52
|
[[%i[topics], :topics_namespaced_names_not_unique]]
|
|
53
53
|
end
|
|
54
|
+
|
|
55
|
+
class << self
|
|
56
|
+
# @param topic [Hash] topic config hash
|
|
57
|
+
# @return [String] topic unique key for validators
|
|
58
|
+
def topic_unique_key(topic)
|
|
59
|
+
topic[:name]
|
|
60
|
+
end
|
|
61
|
+
end
|
|
54
62
|
end
|
|
55
63
|
end
|
|
56
64
|
end
|
|
@@ -20,7 +20,9 @@ module Karafka
|
|
|
20
20
|
required(:max_wait_time) { |val| val.is_a?(Integer) && val >= 10 }
|
|
21
21
|
required(:name) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
|
|
22
22
|
required(:active) { |val| [true, false].include?(val) }
|
|
23
|
-
|
|
23
|
+
nested(:subscription_group_details) do
|
|
24
|
+
required(:name) { |val| val.is_a?(String) && !val.empty? }
|
|
25
|
+
end
|
|
24
26
|
|
|
25
27
|
# Consumer needs to be present only if topic is active
|
|
26
28
|
# We allow not to define consumer for non-active because they may be only used via admin
|