karafka 1.4.13 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -3
- data/.github/workflows/ci.yml +85 -30
- data/.ruby-version +1 -1
- data/CHANGELOG.md +268 -7
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +44 -87
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +44 -48
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +237 -0
- data/bin/karafka +4 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +55 -40
- data/docker-compose.yml +39 -3
- data/karafka.gemspec +11 -17
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +181 -31
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/info.rb +43 -9
- data/lib/karafka/cli/install.rb +19 -10
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -11
- data/lib/karafka/connection/client.rb +385 -90
- data/lib/karafka/connection/listener.rb +246 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -189
- data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger_listener.rb +164 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +52 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +32 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +72 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +118 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +51 -0
- data/lib/karafka/processing/jobs/consume.rb +42 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +88 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +113 -0
- data/lib/karafka/routing/builder.rb +15 -24
- data/lib/karafka/routing/consumer_group.rb +11 -19
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
- data/lib/karafka/routing/topic.rb +61 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +147 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +15 -51
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +38 -17
- data.tar.gz.sig +0 -0
- metadata +118 -120
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
- data/lib/karafka/attributes_map.rb +0 -63
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/cli/missingno.rb +0 -19
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -158
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -23
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -2,69 +2,277 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Connection
|
5
|
-
# A single listener that listens to incoming messages from a single
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
5
|
+
# A single listener that listens to incoming messages from a single subscription group.
|
6
|
+
# It polls the messages and then enqueues jobs. It also takes care of potential recovery from
|
7
|
+
# critical errors by restarting everything in a safe manner.
|
8
|
+
#
|
9
|
+
# This is the heart of the consumption process.
|
9
10
|
class Listener
|
10
|
-
|
11
|
-
|
11
|
+
include Helpers::Async
|
12
|
+
|
13
|
+
# Can be useful for logging
|
14
|
+
# @return [String] id of this listener
|
15
|
+
attr_reader :id
|
16
|
+
|
17
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
18
|
+
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
12
19
|
# @return [Karafka::Connection::Listener] listener instance
|
13
|
-
def initialize(
|
14
|
-
|
20
|
+
def initialize(subscription_group, jobs_queue)
|
21
|
+
proc_config = ::Karafka::App.config.internal.processing
|
22
|
+
|
23
|
+
@id = SecureRandom.uuid
|
24
|
+
@subscription_group = subscription_group
|
25
|
+
@jobs_queue = jobs_queue
|
26
|
+
@coordinators = Processing::CoordinatorsBuffer.new
|
27
|
+
@client = Client.new(@subscription_group)
|
28
|
+
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
29
|
+
@jobs_builder = proc_config.jobs_builder
|
30
|
+
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
31
|
+
# We reference scheduler here as it is much faster than fetching this each time
|
32
|
+
@scheduler = proc_config.scheduler
|
33
|
+
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
34
|
+
# We can do this that way because we always first schedule jobs using messages before we
|
35
|
+
# fetch another batch.
|
36
|
+
@messages_buffer = MessagesBuffer.new(subscription_group)
|
37
|
+
@mutex = Mutex.new
|
38
|
+
@stopped = false
|
15
39
|
end
|
16
40
|
|
17
|
-
# Runs
|
41
|
+
# Runs the main listener fetch loop.
|
42
|
+
#
|
43
|
+
# @note Prefetch callbacks can be used to seek offset or do other things before we actually
|
44
|
+
# start consuming data
|
18
45
|
def call
|
19
46
|
Karafka.monitor.instrument(
|
20
47
|
'connection.listener.before_fetch_loop',
|
21
|
-
|
22
|
-
|
48
|
+
caller: self,
|
49
|
+
subscription_group: @subscription_group,
|
50
|
+
client: @client
|
23
51
|
)
|
52
|
+
|
24
53
|
fetch_loop
|
25
54
|
end
|
26
55
|
|
56
|
+
# Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
|
57
|
+
# stops kafka client.
|
58
|
+
#
|
59
|
+
# @note This method is not private despite being part of the fetch loop because in case of
|
60
|
+
# a forceful shutdown, it may be invoked from a separate thread
|
61
|
+
#
|
62
|
+
# @note We wrap it with a mutex exactly because of the above case of forceful shutdown
|
63
|
+
def shutdown
|
64
|
+
return if @stopped
|
65
|
+
|
66
|
+
@mutex.synchronize do
|
67
|
+
@stopped = true
|
68
|
+
@executors.clear
|
69
|
+
@coordinators.reset
|
70
|
+
@client.commit_offsets!
|
71
|
+
@client.stop
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
27
75
|
private
|
28
76
|
|
29
|
-
#
|
77
|
+
# Fetches the data and adds it to the jobs queue.
|
78
|
+
#
|
30
79
|
# @note We catch all the errors here, so they don't affect other listeners (or this one)
|
31
80
|
# so we will be able to listen and consume other incoming messages.
|
32
|
-
# Since it is run inside Karafka::Connection::
|
33
|
-
# won't crash the whole
|
81
|
+
# Since it is run inside Karafka::Connection::Runner thread - catching all the exceptions
|
82
|
+
# won't crash the whole process. Here we mostly focus on catching the exceptions related to
|
34
83
|
# Kafka connections / Internet connection issues / Etc. Business logic problems should not
|
35
|
-
# propagate this far
|
84
|
+
# propagate this far.
|
36
85
|
def fetch_loop
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
86
|
+
until Karafka::App.stopping?
|
87
|
+
Karafka.monitor.instrument(
|
88
|
+
'connection.listener.fetch_loop',
|
89
|
+
caller: self,
|
90
|
+
client: @client
|
91
|
+
)
|
92
|
+
|
93
|
+
resume_paused_partitions
|
94
|
+
|
95
|
+
Karafka.monitor.instrument(
|
96
|
+
'connection.listener.fetch_loop.received',
|
97
|
+
caller: self,
|
98
|
+
messages_buffer: @messages_buffer
|
99
|
+
) do
|
100
|
+
# We need to fetch data before we revoke lost partitions details as during the polling
|
101
|
+
# the callbacks for tracking lost partitions are triggered. Otherwise we would be
|
102
|
+
# always one batch behind.
|
103
|
+
poll_and_remap_messages
|
47
104
|
end
|
105
|
+
|
106
|
+
# If there were revoked partitions, we need to wait on their jobs to finish before
|
107
|
+
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
108
|
+
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
109
|
+
# appropriate consumers are taken down and re-created
|
110
|
+
build_and_schedule_revoke_lost_partitions_jobs
|
111
|
+
|
112
|
+
# We wait only on jobs from our subscription group. Other groups are independent.
|
113
|
+
# This will block on revoked jobs until they are finished. Those are not meant to last
|
114
|
+
# long and should not have any bigger impact on the system. Doing this in a blocking way
|
115
|
+
# simplifies the overall design and prevents from race conditions
|
116
|
+
wait
|
117
|
+
|
118
|
+
build_and_schedule_consumption_jobs
|
119
|
+
|
120
|
+
wait
|
121
|
+
|
122
|
+
# We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
|
123
|
+
# if needed by using manual offset management.
|
124
|
+
@client.commit_offsets
|
48
125
|
end
|
126
|
+
|
127
|
+
# If we are stopping we will no longer schedule any jobs despite polling.
|
128
|
+
# We need to keep polling not to exceed the `max.poll.interval` for long-running
|
129
|
+
# non-blocking jobs and we need to allow them to finish. We however do not want to
|
130
|
+
# enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
|
131
|
+
# to detect shutdown in their long-running logic or else Karafka will force shutdown
|
132
|
+
# after a while.
|
133
|
+
#
|
134
|
+
# We do not care about resuming any partitions or lost jobs as we do not plan to do
|
135
|
+
# anything with them as we're in the shutdown phase.
|
136
|
+
wait_with_poll
|
137
|
+
|
138
|
+
# We do not want to schedule the shutdown jobs prior to finishing all the jobs
|
139
|
+
# (including non-blocking) as there might be a long-running job with a shutdown and then
|
140
|
+
# we would run two jobs in parallel for the same executor and consumer. We do not want that
|
141
|
+
# as it could create a race-condition.
|
142
|
+
build_and_schedule_shutdown_jobs
|
143
|
+
|
144
|
+
wait_with_poll
|
145
|
+
|
146
|
+
shutdown
|
147
|
+
|
49
148
|
# This is on purpose - see the notes for this method
|
50
149
|
# rubocop:disable Lint/RescueException
|
51
150
|
rescue Exception => e
|
52
|
-
Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
|
53
151
|
# rubocop:enable Lint/RescueException
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
152
|
+
Karafka.monitor.instrument(
|
153
|
+
'error.occurred',
|
154
|
+
caller: self,
|
155
|
+
error: e,
|
156
|
+
type: 'connection.listener.fetch_loop.error'
|
157
|
+
)
|
158
|
+
|
159
|
+
restart
|
160
|
+
|
161
|
+
sleep(1) && retry
|
162
|
+
end
|
163
|
+
|
164
|
+
# Resumes processing of partitions that were paused due to an error.
|
165
|
+
def resume_paused_partitions
|
166
|
+
@coordinators.resume do |topic, partition|
|
167
|
+
@client.resume(topic, partition)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
172
|
+
def build_and_schedule_revoke_lost_partitions_jobs
|
173
|
+
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
174
|
+
|
175
|
+
# Stop early to save on some execution and array allocation
|
176
|
+
return if revoked_partitions.empty?
|
177
|
+
|
178
|
+
jobs = []
|
179
|
+
|
180
|
+
revoked_partitions.each do |topic, partitions|
|
181
|
+
partitions.each do |partition|
|
182
|
+
@coordinators.revoke(topic, partition)
|
183
|
+
|
184
|
+
# There may be a case where we have lost partition of which data we have never
|
185
|
+
# processed (if it was assigned and revoked really fast), thus we may not have it
|
186
|
+
# here. In cases like this, we do not run a revocation job
|
187
|
+
@executors.find_all(topic, partition).each do |executor|
|
188
|
+
jobs << @jobs_builder.revoked(executor)
|
189
|
+
end
|
190
|
+
|
191
|
+
# We need to remove all the executors of a given topic partition that we have lost, so
|
192
|
+
# next time we pick up it's work, new executors kick in. This may be needed especially
|
193
|
+
# for LRJ where we could end up with a race condition
|
194
|
+
# This revocation needs to happen after the jobs are scheduled, otherwise they would
|
195
|
+
# be scheduled with new executors instead of old
|
196
|
+
@executors.revoke(topic, partition)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
@scheduler.schedule_revocation(@jobs_queue, jobs)
|
201
|
+
end
|
202
|
+
|
203
|
+
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
204
|
+
def build_and_schedule_shutdown_jobs
|
205
|
+
jobs = []
|
206
|
+
|
207
|
+
@executors.each do |_, _, executor|
|
208
|
+
jobs << @jobs_builder.shutdown(executor)
|
209
|
+
end
|
210
|
+
|
211
|
+
@scheduler.schedule_shutdown(@jobs_queue, jobs)
|
212
|
+
end
|
213
|
+
|
214
|
+
# Polls messages within the time and amount boundaries defined in the settings and then
|
215
|
+
# builds karafka messages based on the raw rdkafka messages buffer returned by the
|
216
|
+
# `#batch_poll` method.
|
217
|
+
#
|
218
|
+
# @note There are two buffers, one for raw messages and one for "built" karafka messages
|
219
|
+
def poll_and_remap_messages
|
220
|
+
@messages_buffer.remap(
|
221
|
+
@client.batch_poll
|
222
|
+
)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Takes the messages per topic partition and enqueues processing jobs in threads using
|
226
|
+
# given scheduler.
|
227
|
+
def build_and_schedule_consumption_jobs
|
228
|
+
return if @messages_buffer.empty?
|
229
|
+
|
230
|
+
jobs = []
|
231
|
+
|
232
|
+
@messages_buffer.each do |topic, partition, messages|
|
233
|
+
coordinator = @coordinators.find_or_create(topic, partition)
|
234
|
+
|
235
|
+
# Start work coordination for this topic partition
|
236
|
+
coordinator.start(messages)
|
237
|
+
|
238
|
+
@partitioner.call(topic, messages) do |group_id, partition_messages|
|
239
|
+
# Count the job we're going to create here
|
240
|
+
coordinator.increment
|
241
|
+
|
242
|
+
executor = @executors.find_or_create(topic, partition, group_id)
|
243
|
+
|
244
|
+
jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
249
|
+
end
|
250
|
+
|
251
|
+
# Waits for all the jobs from a given subscription group to finish before moving forward
|
252
|
+
def wait
|
253
|
+
@jobs_queue.wait(@subscription_group.id)
|
254
|
+
end
|
255
|
+
|
256
|
+
# Waits without blocking the polling
|
257
|
+
# This should be used only when we no longer plan to use any incoming data and we can safely
|
258
|
+
# discard it
|
259
|
+
def wait_with_poll
|
260
|
+
@client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
|
62
261
|
end
|
63
262
|
|
64
|
-
#
|
65
|
-
#
|
66
|
-
|
67
|
-
|
263
|
+
# We can stop client without a problem, as it will reinitialize itself when running the
|
264
|
+
# `#fetch_loop` again. We just need to remember to also reset the runner as it is a long
|
265
|
+
# running one, so with a new connection to Kafka, we need to initialize the state of the
|
266
|
+
# runner and underlying consumers once again.
|
267
|
+
def restart
|
268
|
+
# If there was any problem with processing, before we reset things we need to make sure,
|
269
|
+
# there are no jobs in the queue. Otherwise it could lead to leakage in between client
|
270
|
+
# resetting.
|
271
|
+
@jobs_queue.wait(@subscription_group.id)
|
272
|
+
@jobs_queue.clear(@subscription_group.id)
|
273
|
+
@client.reset
|
274
|
+
@coordinators.reset
|
275
|
+
@executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
|
68
276
|
end
|
69
277
|
end
|
70
278
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Connection
|
5
|
+
# Abstraction layer around listeners batch.
|
6
|
+
class ListenersBatch
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# @param jobs_queue [JobsQueue]
|
10
|
+
# @return [ListenersBatch]
|
11
|
+
def initialize(jobs_queue)
|
12
|
+
@batch = App.subscription_groups.map do |subscription_group|
|
13
|
+
Connection::Listener.new(subscription_group, jobs_queue)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Iterates over available listeners and yields each listener
|
18
|
+
# @param block [Proc] block we want to run
|
19
|
+
def each(&block)
|
20
|
+
@batch.each(&block)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Connection
|
5
|
+
# Buffer used to build and store karafka messages built based on raw librdkafka messages.
|
6
|
+
#
|
7
|
+
# Why do we have two buffers? `RawMessagesBuffer` is used to store raw messages and to handle
|
8
|
+
# cases related to partition revocation and reconnections. It is "internal" to the listening
|
9
|
+
# process. `MessagesBuffer` on the other hand is used to "translate" those raw messages that
|
10
|
+
# we know that are ok into Karafka messages and to simplify further work with them.
|
11
|
+
#
|
12
|
+
# While it adds a bit of overhead, it makes conceptual things much easier and it adds only two
|
13
|
+
# simple hash iterations over messages batch.
|
14
|
+
#
|
15
|
+
# @note This buffer is NOT thread safe. We do not worry about it as we do not use it outside
|
16
|
+
# of the main listener loop. It can be cleared after the jobs are scheduled with messages
|
17
|
+
# it stores, because messages arrays are not "cleared" in any way directly and their
|
18
|
+
# reference stays.
|
19
|
+
class MessagesBuffer
|
20
|
+
attr_reader :size
|
21
|
+
|
22
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
23
|
+
def initialize(subscription_group)
|
24
|
+
@subscription_group = subscription_group
|
25
|
+
@size = 0
|
26
|
+
@groups = Hash.new do |topic_groups, topic|
|
27
|
+
topic_groups[topic] = Hash.new do |partition_groups, partition|
|
28
|
+
partition_groups[partition] = []
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Remaps raw messages from the raw messages buffer to Karafka messages
|
34
|
+
# @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
|
35
|
+
def remap(raw_messages_buffer)
|
36
|
+
clear unless @size.zero?
|
37
|
+
|
38
|
+
# Since it happens "right after" we've received the messages, it is close enough it time
|
39
|
+
# to be used as the moment we received messages.
|
40
|
+
received_at = Time.now
|
41
|
+
|
42
|
+
raw_messages_buffer.each do |topic, partition, messages|
|
43
|
+
@size += messages.count
|
44
|
+
|
45
|
+
ktopic = @subscription_group.topics.find(topic)
|
46
|
+
|
47
|
+
@groups[topic][partition] = messages.map do |message|
|
48
|
+
Messages::Builders::Message.call(
|
49
|
+
message,
|
50
|
+
ktopic,
|
51
|
+
received_at
|
52
|
+
)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Allows to iterate over all the topics and partitions messages
|
58
|
+
#
|
59
|
+
# @yieldparam [String] topic name
|
60
|
+
# @yieldparam [Integer] partition number
|
61
|
+
# @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
|
62
|
+
def each
|
63
|
+
@groups.each do |topic, partitions|
|
64
|
+
partitions.each do |partition, messages|
|
65
|
+
yield(topic, partition, messages)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# @return [Boolean] is the buffer empty or does it contain any messages
|
71
|
+
def empty?
|
72
|
+
@size.zero?
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
# Clears the buffer completely
|
78
|
+
def clear
|
79
|
+
@size = 0
|
80
|
+
@groups.clear
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Connection
|
5
|
+
# Partitions pauses management abstraction layer.
|
6
|
+
# It aggregates all the pauses for all the partitions that we're working with.
|
7
|
+
class PausesManager
|
8
|
+
# @return [Karafka::Connection::PausesManager] pauses manager
|
9
|
+
def initialize
|
10
|
+
@pauses = Hash.new do |h, k|
|
11
|
+
h[k] = {}
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# Creates or fetches pause tracker of a given topic partition.
|
16
|
+
#
|
17
|
+
# @param topic [String] topic name
|
18
|
+
# @param partition [Integer] partition number
|
19
|
+
# @return [Karafka::TimeTrackers::Pause] pause tracker instance
|
20
|
+
def fetch(topic, partition)
|
21
|
+
@pauses[topic][partition] ||= TimeTrackers::Pause.new(
|
22
|
+
timeout: Karafka::App.config.pause_timeout,
|
23
|
+
max_timeout: Karafka::App.config.pause_max_timeout,
|
24
|
+
exponential_backoff: Karafka::App.config.pause_with_exponential_backoff
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Resumes processing of partitions for which pause time has ended.
|
29
|
+
#
|
30
|
+
# @yieldparam [String] topic name
|
31
|
+
# @yieldparam [Integer] partition number
|
32
|
+
def resume
|
33
|
+
@pauses.each do |topic, partitions|
|
34
|
+
partitions.each do |partition, pause|
|
35
|
+
next unless pause.paused?
|
36
|
+
next unless pause.expired?
|
37
|
+
|
38
|
+
pause.resume
|
39
|
+
|
40
|
+
yield(topic, partition)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Connection
|
5
|
+
# Buffer for raw librdkafka messages.
|
6
|
+
#
|
7
|
+
# When message is added to this buffer, it gets assigned to an array with other messages from
|
8
|
+
# the same topic and partition.
|
9
|
+
#
|
10
|
+
# @note This buffer is NOT threadsafe.
|
11
|
+
#
|
12
|
+
# @note We store data here in groups per topic partition to handle the revocation case, where
|
13
|
+
# we may need to remove messages from a single topic partition.
|
14
|
+
class RawMessagesBuffer
|
15
|
+
attr_reader :size
|
16
|
+
|
17
|
+
# @return [Karafka::Connection::MessagesBuffer] buffer instance
|
18
|
+
def initialize
|
19
|
+
@size = 0
|
20
|
+
@groups = Hash.new do |topic_groups, topic|
|
21
|
+
topic_groups[topic] = Hash.new do |partition_groups, partition|
|
22
|
+
partition_groups[partition] = []
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Adds a message to the buffer.
|
28
|
+
#
|
29
|
+
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
30
|
+
# @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
|
31
|
+
def <<(message)
|
32
|
+
@size += 1
|
33
|
+
@groups[message.topic][message.partition] << message
|
34
|
+
end
|
35
|
+
|
36
|
+
# Allows to iterate over all the topics and partitions messages
|
37
|
+
#
|
38
|
+
# @yieldparam [String] topic name
|
39
|
+
# @yieldparam [Integer] partition number
|
40
|
+
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
41
|
+
def each
|
42
|
+
@groups.each do |topic, partitions|
|
43
|
+
partitions.each do |partition, messages|
|
44
|
+
yield(topic, partition, messages)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Removes given topic and partition data out of the buffer
|
50
|
+
# This is used when there's a partition revocation
|
51
|
+
# @param topic [String] topic we're interested in
|
52
|
+
# @param partition [Integer] partition of which data we want to remove
|
53
|
+
def delete(topic, partition)
|
54
|
+
return unless @groups.key?(topic)
|
55
|
+
return unless @groups.fetch(topic).key?(partition)
|
56
|
+
|
57
|
+
topic_data = @groups.fetch(topic)
|
58
|
+
topic_data.delete(partition)
|
59
|
+
|
60
|
+
recount!
|
61
|
+
|
62
|
+
# If there are no more partitions to handle in a given topic, remove it completely
|
63
|
+
@groups.delete(topic) if topic_data.empty?
|
64
|
+
end
|
65
|
+
|
66
|
+
# Removes duplicated messages from the same partitions
|
67
|
+
# This should be used only when rebalance occurs, as we may get data again we already have
|
68
|
+
# due to the processing from the last offset. In cases like this, we may get same data
|
69
|
+
# again and we do want to ensure as few duplications as possible
|
70
|
+
def uniq!
|
71
|
+
@groups.each_value do |partitions|
|
72
|
+
partitions.each_value do |messages|
|
73
|
+
messages.uniq!(&:offset)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
recount!
|
78
|
+
end
|
79
|
+
|
80
|
+
# Removes all the data from the buffer.
|
81
|
+
#
|
82
|
+
# @note We do not clear the whole groups hash but rather we clear the partition hashes, so
|
83
|
+
# we save ourselves some objects allocations. We cannot clear the underlying arrays as they
|
84
|
+
# may be used in other threads for data processing, thus if we would clear it, we could
|
85
|
+
# potentially clear a raw messages array for a job that is in the jobs queue.
|
86
|
+
def clear
|
87
|
+
@size = 0
|
88
|
+
@groups.each_value(&:clear)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
# Updates the messages count if we performed any operations that could change the state
|
94
|
+
def recount!
|
95
|
+
@size = @groups.each_value.sum do |partitions|
|
96
|
+
partitions.each_value.map(&:count).sum
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Connection
|
5
|
+
# Manager for tracking changes in the partitions assignment.
|
6
|
+
#
|
7
|
+
# We need tracking of those to clean up consumers that will no longer process given partitions
|
8
|
+
# as they were taken away.
|
9
|
+
#
|
10
|
+
# @note Since this does not happen really often, we try to stick with same objects for the
|
11
|
+
# empty states most of the time, so we don't create many objects during the manager life
|
12
|
+
#
|
13
|
+
# @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
|
14
|
+
# that are lost, are those that got revoked but did not get re-assigned back. We do not
|
15
|
+
# expose this concept outside and we normalize to have them revoked, as it is irrelevant
|
16
|
+
# from the rest of the code perspective as only those that are lost are truly revoked.
|
17
|
+
class RebalanceManager
|
18
|
+
# Empty array for internal usage not to create new objects
|
19
|
+
EMPTY_ARRAY = [].freeze
|
20
|
+
|
21
|
+
attr_reader :assigned_partitions, :revoked_partitions
|
22
|
+
|
23
|
+
private_constant :EMPTY_ARRAY
|
24
|
+
|
25
|
+
# @return [RebalanceManager]
|
26
|
+
def initialize
|
27
|
+
@assigned_partitions = {}
|
28
|
+
@revoked_partitions = {}
|
29
|
+
@changed = false
|
30
|
+
end
|
31
|
+
|
32
|
+
# Resets the rebalance manager state
|
33
|
+
# This needs to be done before each polling loop as during the polling, the state may be
|
34
|
+
# changed
|
35
|
+
def clear
|
36
|
+
@assigned_partitions.clear
|
37
|
+
@revoked_partitions.clear
|
38
|
+
@changed = false
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [Boolean] indicates a state change in the partitions assignment
|
42
|
+
def changed?
|
43
|
+
@changed
|
44
|
+
end
|
45
|
+
|
46
|
+
# Callback that kicks in inside of rdkafka, when new partitions are assigned.
|
47
|
+
#
|
48
|
+
# @private
|
49
|
+
# @param _ [Rdkafka::Consumer]
|
50
|
+
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
51
|
+
def on_partitions_assigned(_, partitions)
|
52
|
+
@assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
53
|
+
@changed = true
|
54
|
+
end
|
55
|
+
|
56
|
+
# Callback that kicks in inside of rdkafka, when partitions are revoked.
|
57
|
+
#
|
58
|
+
# @private
|
59
|
+
# @param _ [Rdkafka::Consumer]
|
60
|
+
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
61
|
+
def on_partitions_revoked(_, partitions)
|
62
|
+
@revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
63
|
+
@changed = true
|
64
|
+
end
|
65
|
+
|
66
|
+
# We consider as lost only partitions that were taken away and not re-assigned back to us
|
67
|
+
def lost_partitions
|
68
|
+
lost_partitions = {}
|
69
|
+
|
70
|
+
revoked_partitions.each do |topic, partitions|
|
71
|
+
lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
|
72
|
+
end
|
73
|
+
|
74
|
+
lost_partitions
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Contracts
|
5
|
+
# Base contract for all Karafka contracts
|
6
|
+
class Base < ::Karafka::Core::Contractable::Contract
|
7
|
+
# @param data [Hash] data for validation
|
8
|
+
# @return [Boolean] true if all good
|
9
|
+
# @raise [Errors::InvalidConfigurationError] invalid configuration error
|
10
|
+
# @note We use contracts only in the config validation context, so no need to add support
|
11
|
+
# for multiple error classes. It will be added when it will be needed.
|
12
|
+
def validate!(data)
|
13
|
+
super(data, Errors::InvalidConfigurationError)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|