karafka 1.4.13 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -3
- data/.github/workflows/ci.yml +85 -30
- data/.ruby-version +1 -1
- data/CHANGELOG.md +268 -7
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +44 -87
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +44 -48
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +237 -0
- data/bin/karafka +4 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +55 -40
- data/docker-compose.yml +39 -3
- data/karafka.gemspec +11 -17
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +181 -31
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/info.rb +43 -9
- data/lib/karafka/cli/install.rb +19 -10
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -11
- data/lib/karafka/connection/client.rb +385 -90
- data/lib/karafka/connection/listener.rb +246 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -189
- data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger_listener.rb +164 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +52 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +32 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +72 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +118 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +51 -0
- data/lib/karafka/processing/jobs/consume.rb +42 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +88 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +113 -0
- data/lib/karafka/routing/builder.rb +15 -24
- data/lib/karafka/routing/consumer_group.rb +11 -19
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
- data/lib/karafka/routing/topic.rb +61 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +147 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +15 -51
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +38 -17
- data.tar.gz.sig +0 -0
- metadata +118 -120
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
- data/lib/karafka/attributes_map.rb +0 -63
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/cli/missingno.rb +0 -19
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -158
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -23
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
|
@@ -2,69 +2,277 @@
|
|
|
2
2
|
|
|
3
3
|
module Karafka
|
|
4
4
|
module Connection
|
|
5
|
-
# A single listener that listens to incoming messages from a single
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
5
|
+
# A single listener that listens to incoming messages from a single subscription group.
|
|
6
|
+
# It polls the messages and then enqueues jobs. It also takes care of potential recovery from
|
|
7
|
+
# critical errors by restarting everything in a safe manner.
|
|
8
|
+
#
|
|
9
|
+
# This is the heart of the consumption process.
|
|
9
10
|
class Listener
|
|
10
|
-
|
|
11
|
-
|
|
11
|
+
include Helpers::Async
|
|
12
|
+
|
|
13
|
+
# Can be useful for logging
|
|
14
|
+
# @return [String] id of this listener
|
|
15
|
+
attr_reader :id
|
|
16
|
+
|
|
17
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
|
18
|
+
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
|
12
19
|
# @return [Karafka::Connection::Listener] listener instance
|
|
13
|
-
def initialize(
|
|
14
|
-
|
|
20
|
+
def initialize(subscription_group, jobs_queue)
|
|
21
|
+
proc_config = ::Karafka::App.config.internal.processing
|
|
22
|
+
|
|
23
|
+
@id = SecureRandom.uuid
|
|
24
|
+
@subscription_group = subscription_group
|
|
25
|
+
@jobs_queue = jobs_queue
|
|
26
|
+
@coordinators = Processing::CoordinatorsBuffer.new
|
|
27
|
+
@client = Client.new(@subscription_group)
|
|
28
|
+
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
|
29
|
+
@jobs_builder = proc_config.jobs_builder
|
|
30
|
+
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
|
31
|
+
# We reference scheduler here as it is much faster than fetching this each time
|
|
32
|
+
@scheduler = proc_config.scheduler
|
|
33
|
+
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
|
34
|
+
# We can do this that way because we always first schedule jobs using messages before we
|
|
35
|
+
# fetch another batch.
|
|
36
|
+
@messages_buffer = MessagesBuffer.new(subscription_group)
|
|
37
|
+
@mutex = Mutex.new
|
|
38
|
+
@stopped = false
|
|
15
39
|
end
|
|
16
40
|
|
|
17
|
-
# Runs
|
|
41
|
+
# Runs the main listener fetch loop.
|
|
42
|
+
#
|
|
43
|
+
# @note Prefetch callbacks can be used to seek offset or do other things before we actually
|
|
44
|
+
# start consuming data
|
|
18
45
|
def call
|
|
19
46
|
Karafka.monitor.instrument(
|
|
20
47
|
'connection.listener.before_fetch_loop',
|
|
21
|
-
|
|
22
|
-
|
|
48
|
+
caller: self,
|
|
49
|
+
subscription_group: @subscription_group,
|
|
50
|
+
client: @client
|
|
23
51
|
)
|
|
52
|
+
|
|
24
53
|
fetch_loop
|
|
25
54
|
end
|
|
26
55
|
|
|
56
|
+
# Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
|
|
57
|
+
# stops kafka client.
|
|
58
|
+
#
|
|
59
|
+
# @note This method is not private despite being part of the fetch loop because in case of
|
|
60
|
+
# a forceful shutdown, it may be invoked from a separate thread
|
|
61
|
+
#
|
|
62
|
+
# @note We wrap it with a mutex exactly because of the above case of forceful shutdown
|
|
63
|
+
def shutdown
|
|
64
|
+
return if @stopped
|
|
65
|
+
|
|
66
|
+
@mutex.synchronize do
|
|
67
|
+
@stopped = true
|
|
68
|
+
@executors.clear
|
|
69
|
+
@coordinators.reset
|
|
70
|
+
@client.commit_offsets!
|
|
71
|
+
@client.stop
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
27
75
|
private
|
|
28
76
|
|
|
29
|
-
#
|
|
77
|
+
# Fetches the data and adds it to the jobs queue.
|
|
78
|
+
#
|
|
30
79
|
# @note We catch all the errors here, so they don't affect other listeners (or this one)
|
|
31
80
|
# so we will be able to listen and consume other incoming messages.
|
|
32
|
-
# Since it is run inside Karafka::Connection::
|
|
33
|
-
# won't crash the whole
|
|
81
|
+
# Since it is run inside Karafka::Connection::Runner thread - catching all the exceptions
|
|
82
|
+
# won't crash the whole process. Here we mostly focus on catching the exceptions related to
|
|
34
83
|
# Kafka connections / Internet connection issues / Etc. Business logic problems should not
|
|
35
|
-
# propagate this far
|
|
84
|
+
# propagate this far.
|
|
36
85
|
def fetch_loop
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
86
|
+
until Karafka::App.stopping?
|
|
87
|
+
Karafka.monitor.instrument(
|
|
88
|
+
'connection.listener.fetch_loop',
|
|
89
|
+
caller: self,
|
|
90
|
+
client: @client
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
resume_paused_partitions
|
|
94
|
+
|
|
95
|
+
Karafka.monitor.instrument(
|
|
96
|
+
'connection.listener.fetch_loop.received',
|
|
97
|
+
caller: self,
|
|
98
|
+
messages_buffer: @messages_buffer
|
|
99
|
+
) do
|
|
100
|
+
# We need to fetch data before we revoke lost partitions details as during the polling
|
|
101
|
+
# the callbacks for tracking lost partitions are triggered. Otherwise we would be
|
|
102
|
+
# always one batch behind.
|
|
103
|
+
poll_and_remap_messages
|
|
47
104
|
end
|
|
105
|
+
|
|
106
|
+
# If there were revoked partitions, we need to wait on their jobs to finish before
|
|
107
|
+
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
|
108
|
+
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
|
109
|
+
# appropriate consumers are taken down and re-created
|
|
110
|
+
build_and_schedule_revoke_lost_partitions_jobs
|
|
111
|
+
|
|
112
|
+
# We wait only on jobs from our subscription group. Other groups are independent.
|
|
113
|
+
# This will block on revoked jobs until they are finished. Those are not meant to last
|
|
114
|
+
# long and should not have any bigger impact on the system. Doing this in a blocking way
|
|
115
|
+
# simplifies the overall design and prevents from race conditions
|
|
116
|
+
wait
|
|
117
|
+
|
|
118
|
+
build_and_schedule_consumption_jobs
|
|
119
|
+
|
|
120
|
+
wait
|
|
121
|
+
|
|
122
|
+
# We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
|
|
123
|
+
# if needed by using manual offset management.
|
|
124
|
+
@client.commit_offsets
|
|
48
125
|
end
|
|
126
|
+
|
|
127
|
+
# If we are stopping we will no longer schedule any jobs despite polling.
|
|
128
|
+
# We need to keep polling not to exceed the `max.poll.interval` for long-running
|
|
129
|
+
# non-blocking jobs and we need to allow them to finish. We however do not want to
|
|
130
|
+
# enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
|
|
131
|
+
# to detect shutdown in their long-running logic or else Karafka will force shutdown
|
|
132
|
+
# after a while.
|
|
133
|
+
#
|
|
134
|
+
# We do not care about resuming any partitions or lost jobs as we do not plan to do
|
|
135
|
+
# anything with them as we're in the shutdown phase.
|
|
136
|
+
wait_with_poll
|
|
137
|
+
|
|
138
|
+
# We do not want to schedule the shutdown jobs prior to finishing all the jobs
|
|
139
|
+
# (including non-blocking) as there might be a long-running job with a shutdown and then
|
|
140
|
+
# we would run two jobs in parallel for the same executor and consumer. We do not want that
|
|
141
|
+
# as it could create a race-condition.
|
|
142
|
+
build_and_schedule_shutdown_jobs
|
|
143
|
+
|
|
144
|
+
wait_with_poll
|
|
145
|
+
|
|
146
|
+
shutdown
|
|
147
|
+
|
|
49
148
|
# This is on purpose - see the notes for this method
|
|
50
149
|
# rubocop:disable Lint/RescueException
|
|
51
150
|
rescue Exception => e
|
|
52
|
-
Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
|
|
53
151
|
# rubocop:enable Lint/RescueException
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
152
|
+
Karafka.monitor.instrument(
|
|
153
|
+
'error.occurred',
|
|
154
|
+
caller: self,
|
|
155
|
+
error: e,
|
|
156
|
+
type: 'connection.listener.fetch_loop.error'
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
restart
|
|
160
|
+
|
|
161
|
+
sleep(1) && retry
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Resumes processing of partitions that were paused due to an error.
|
|
165
|
+
def resume_paused_partitions
|
|
166
|
+
@coordinators.resume do |topic, partition|
|
|
167
|
+
@client.resume(topic, partition)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Enqueues revoking jobs for partitions that were taken away from the running process.
|
|
172
|
+
def build_and_schedule_revoke_lost_partitions_jobs
|
|
173
|
+
revoked_partitions = @client.rebalance_manager.revoked_partitions
|
|
174
|
+
|
|
175
|
+
# Stop early to save on some execution and array allocation
|
|
176
|
+
return if revoked_partitions.empty?
|
|
177
|
+
|
|
178
|
+
jobs = []
|
|
179
|
+
|
|
180
|
+
revoked_partitions.each do |topic, partitions|
|
|
181
|
+
partitions.each do |partition|
|
|
182
|
+
@coordinators.revoke(topic, partition)
|
|
183
|
+
|
|
184
|
+
# There may be a case where we have lost partition of which data we have never
|
|
185
|
+
# processed (if it was assigned and revoked really fast), thus we may not have it
|
|
186
|
+
# here. In cases like this, we do not run a revocation job
|
|
187
|
+
@executors.find_all(topic, partition).each do |executor|
|
|
188
|
+
jobs << @jobs_builder.revoked(executor)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# We need to remove all the executors of a given topic partition that we have lost, so
|
|
192
|
+
# next time we pick up it's work, new executors kick in. This may be needed especially
|
|
193
|
+
# for LRJ where we could end up with a race condition
|
|
194
|
+
# This revocation needs to happen after the jobs are scheduled, otherwise they would
|
|
195
|
+
# be scheduled with new executors instead of old
|
|
196
|
+
@executors.revoke(topic, partition)
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
@scheduler.schedule_revocation(@jobs_queue, jobs)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Enqueues the shutdown jobs for all the executors that exist in our subscription group
|
|
204
|
+
def build_and_schedule_shutdown_jobs
|
|
205
|
+
jobs = []
|
|
206
|
+
|
|
207
|
+
@executors.each do |_, _, executor|
|
|
208
|
+
jobs << @jobs_builder.shutdown(executor)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
@scheduler.schedule_shutdown(@jobs_queue, jobs)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Polls messages within the time and amount boundaries defined in the settings and then
|
|
215
|
+
# builds karafka messages based on the raw rdkafka messages buffer returned by the
|
|
216
|
+
# `#batch_poll` method.
|
|
217
|
+
#
|
|
218
|
+
# @note There are two buffers, one for raw messages and one for "built" karafka messages
|
|
219
|
+
def poll_and_remap_messages
|
|
220
|
+
@messages_buffer.remap(
|
|
221
|
+
@client.batch_poll
|
|
222
|
+
)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Takes the messages per topic partition and enqueues processing jobs in threads using
|
|
226
|
+
# given scheduler.
|
|
227
|
+
def build_and_schedule_consumption_jobs
|
|
228
|
+
return if @messages_buffer.empty?
|
|
229
|
+
|
|
230
|
+
jobs = []
|
|
231
|
+
|
|
232
|
+
@messages_buffer.each do |topic, partition, messages|
|
|
233
|
+
coordinator = @coordinators.find_or_create(topic, partition)
|
|
234
|
+
|
|
235
|
+
# Start work coordination for this topic partition
|
|
236
|
+
coordinator.start(messages)
|
|
237
|
+
|
|
238
|
+
@partitioner.call(topic, messages) do |group_id, partition_messages|
|
|
239
|
+
# Count the job we're going to create here
|
|
240
|
+
coordinator.increment
|
|
241
|
+
|
|
242
|
+
executor = @executors.find_or_create(topic, partition, group_id)
|
|
243
|
+
|
|
244
|
+
jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Waits for all the jobs from a given subscription group to finish before moving forward
|
|
252
|
+
def wait
|
|
253
|
+
@jobs_queue.wait(@subscription_group.id)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Waits without blocking the polling
|
|
257
|
+
# This should be used only when we no longer plan to use any incoming data and we can safely
|
|
258
|
+
# discard it
|
|
259
|
+
def wait_with_poll
|
|
260
|
+
@client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
|
|
62
261
|
end
|
|
63
262
|
|
|
64
|
-
#
|
|
65
|
-
#
|
|
66
|
-
|
|
67
|
-
|
|
263
|
+
# We can stop client without a problem, as it will reinitialize itself when running the
|
|
264
|
+
# `#fetch_loop` again. We just need to remember to also reset the runner as it is a long
|
|
265
|
+
# running one, so with a new connection to Kafka, we need to initialize the state of the
|
|
266
|
+
# runner and underlying consumers once again.
|
|
267
|
+
def restart
|
|
268
|
+
# If there was any problem with processing, before we reset things we need to make sure,
|
|
269
|
+
# there are no jobs in the queue. Otherwise it could lead to leakage in between client
|
|
270
|
+
# resetting.
|
|
271
|
+
@jobs_queue.wait(@subscription_group.id)
|
|
272
|
+
@jobs_queue.clear(@subscription_group.id)
|
|
273
|
+
@client.reset
|
|
274
|
+
@coordinators.reset
|
|
275
|
+
@executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
|
|
68
276
|
end
|
|
69
277
|
end
|
|
70
278
|
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Connection
|
|
5
|
+
# Abstraction layer around listeners batch.
|
|
6
|
+
class ListenersBatch
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
# @param jobs_queue [JobsQueue]
|
|
10
|
+
# @return [ListenersBatch]
|
|
11
|
+
def initialize(jobs_queue)
|
|
12
|
+
@batch = App.subscription_groups.map do |subscription_group|
|
|
13
|
+
Connection::Listener.new(subscription_group, jobs_queue)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Iterates over available listeners and yields each listener
|
|
18
|
+
# @param block [Proc] block we want to run
|
|
19
|
+
def each(&block)
|
|
20
|
+
@batch.each(&block)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Connection
|
|
5
|
+
# Buffer used to build and store karafka messages built based on raw librdkafka messages.
|
|
6
|
+
#
|
|
7
|
+
# Why do we have two buffers? `RawMessagesBuffer` is used to store raw messages and to handle
|
|
8
|
+
# cases related to partition revocation and reconnections. It is "internal" to the listening
|
|
9
|
+
# process. `MessagesBuffer` on the other hand is used to "translate" those raw messages that
|
|
10
|
+
# we know that are ok into Karafka messages and to simplify further work with them.
|
|
11
|
+
#
|
|
12
|
+
# While it adds a bit of overhead, it makes conceptual things much easier and it adds only two
|
|
13
|
+
# simple hash iterations over messages batch.
|
|
14
|
+
#
|
|
15
|
+
# @note This buffer is NOT thread safe. We do not worry about it as we do not use it outside
|
|
16
|
+
# of the main listener loop. It can be cleared after the jobs are scheduled with messages
|
|
17
|
+
# it stores, because messages arrays are not "cleared" in any way directly and their
|
|
18
|
+
# reference stays.
|
|
19
|
+
class MessagesBuffer
|
|
20
|
+
attr_reader :size
|
|
21
|
+
|
|
22
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup]
|
|
23
|
+
def initialize(subscription_group)
|
|
24
|
+
@subscription_group = subscription_group
|
|
25
|
+
@size = 0
|
|
26
|
+
@groups = Hash.new do |topic_groups, topic|
|
|
27
|
+
topic_groups[topic] = Hash.new do |partition_groups, partition|
|
|
28
|
+
partition_groups[partition] = []
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Remaps raw messages from the raw messages buffer to Karafka messages
|
|
34
|
+
# @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
|
|
35
|
+
def remap(raw_messages_buffer)
|
|
36
|
+
clear unless @size.zero?
|
|
37
|
+
|
|
38
|
+
# Since it happens "right after" we've received the messages, it is close enough it time
|
|
39
|
+
# to be used as the moment we received messages.
|
|
40
|
+
received_at = Time.now
|
|
41
|
+
|
|
42
|
+
raw_messages_buffer.each do |topic, partition, messages|
|
|
43
|
+
@size += messages.count
|
|
44
|
+
|
|
45
|
+
ktopic = @subscription_group.topics.find(topic)
|
|
46
|
+
|
|
47
|
+
@groups[topic][partition] = messages.map do |message|
|
|
48
|
+
Messages::Builders::Message.call(
|
|
49
|
+
message,
|
|
50
|
+
ktopic,
|
|
51
|
+
received_at
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Allows to iterate over all the topics and partitions messages
|
|
58
|
+
#
|
|
59
|
+
# @yieldparam [String] topic name
|
|
60
|
+
# @yieldparam [Integer] partition number
|
|
61
|
+
# @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
|
|
62
|
+
def each
|
|
63
|
+
@groups.each do |topic, partitions|
|
|
64
|
+
partitions.each do |partition, messages|
|
|
65
|
+
yield(topic, partition, messages)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @return [Boolean] is the buffer empty or does it contain any messages
|
|
71
|
+
def empty?
|
|
72
|
+
@size.zero?
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
# Clears the buffer completely
|
|
78
|
+
def clear
|
|
79
|
+
@size = 0
|
|
80
|
+
@groups.clear
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Connection
|
|
5
|
+
# Partitions pauses management abstraction layer.
|
|
6
|
+
# It aggregates all the pauses for all the partitions that we're working with.
|
|
7
|
+
class PausesManager
|
|
8
|
+
# @return [Karafka::Connection::PausesManager] pauses manager
|
|
9
|
+
def initialize
|
|
10
|
+
@pauses = Hash.new do |h, k|
|
|
11
|
+
h[k] = {}
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Creates or fetches pause tracker of a given topic partition.
|
|
16
|
+
#
|
|
17
|
+
# @param topic [String] topic name
|
|
18
|
+
# @param partition [Integer] partition number
|
|
19
|
+
# @return [Karafka::TimeTrackers::Pause] pause tracker instance
|
|
20
|
+
def fetch(topic, partition)
|
|
21
|
+
@pauses[topic][partition] ||= TimeTrackers::Pause.new(
|
|
22
|
+
timeout: Karafka::App.config.pause_timeout,
|
|
23
|
+
max_timeout: Karafka::App.config.pause_max_timeout,
|
|
24
|
+
exponential_backoff: Karafka::App.config.pause_with_exponential_backoff
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Resumes processing of partitions for which pause time has ended.
|
|
29
|
+
#
|
|
30
|
+
# @yieldparam [String] topic name
|
|
31
|
+
# @yieldparam [Integer] partition number
|
|
32
|
+
def resume
|
|
33
|
+
@pauses.each do |topic, partitions|
|
|
34
|
+
partitions.each do |partition, pause|
|
|
35
|
+
next unless pause.paused?
|
|
36
|
+
next unless pause.expired?
|
|
37
|
+
|
|
38
|
+
pause.resume
|
|
39
|
+
|
|
40
|
+
yield(topic, partition)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Connection
|
|
5
|
+
# Buffer for raw librdkafka messages.
|
|
6
|
+
#
|
|
7
|
+
# When message is added to this buffer, it gets assigned to an array with other messages from
|
|
8
|
+
# the same topic and partition.
|
|
9
|
+
#
|
|
10
|
+
# @note This buffer is NOT threadsafe.
|
|
11
|
+
#
|
|
12
|
+
# @note We store data here in groups per topic partition to handle the revocation case, where
|
|
13
|
+
# we may need to remove messages from a single topic partition.
|
|
14
|
+
class RawMessagesBuffer
|
|
15
|
+
attr_reader :size
|
|
16
|
+
|
|
17
|
+
# @return [Karafka::Connection::MessagesBuffer] buffer instance
|
|
18
|
+
def initialize
|
|
19
|
+
@size = 0
|
|
20
|
+
@groups = Hash.new do |topic_groups, topic|
|
|
21
|
+
topic_groups[topic] = Hash.new do |partition_groups, partition|
|
|
22
|
+
partition_groups[partition] = []
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Adds a message to the buffer.
|
|
28
|
+
#
|
|
29
|
+
# @param message [Rdkafka::Consumer::Message] raw rdkafka message
|
|
30
|
+
# @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
|
|
31
|
+
def <<(message)
|
|
32
|
+
@size += 1
|
|
33
|
+
@groups[message.topic][message.partition] << message
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Allows to iterate over all the topics and partitions messages
|
|
37
|
+
#
|
|
38
|
+
# @yieldparam [String] topic name
|
|
39
|
+
# @yieldparam [Integer] partition number
|
|
40
|
+
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
|
41
|
+
def each
|
|
42
|
+
@groups.each do |topic, partitions|
|
|
43
|
+
partitions.each do |partition, messages|
|
|
44
|
+
yield(topic, partition, messages)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Removes given topic and partition data out of the buffer
|
|
50
|
+
# This is used when there's a partition revocation
|
|
51
|
+
# @param topic [String] topic we're interested in
|
|
52
|
+
# @param partition [Integer] partition of which data we want to remove
|
|
53
|
+
def delete(topic, partition)
|
|
54
|
+
return unless @groups.key?(topic)
|
|
55
|
+
return unless @groups.fetch(topic).key?(partition)
|
|
56
|
+
|
|
57
|
+
topic_data = @groups.fetch(topic)
|
|
58
|
+
topic_data.delete(partition)
|
|
59
|
+
|
|
60
|
+
recount!
|
|
61
|
+
|
|
62
|
+
# If there are no more partitions to handle in a given topic, remove it completely
|
|
63
|
+
@groups.delete(topic) if topic_data.empty?
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Removes duplicated messages from the same partitions
|
|
67
|
+
# This should be used only when rebalance occurs, as we may get data again we already have
|
|
68
|
+
# due to the processing from the last offset. In cases like this, we may get same data
|
|
69
|
+
# again and we do want to ensure as few duplications as possible
|
|
70
|
+
def uniq!
|
|
71
|
+
@groups.each_value do |partitions|
|
|
72
|
+
partitions.each_value do |messages|
|
|
73
|
+
messages.uniq!(&:offset)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
recount!
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Removes all the data from the buffer.
|
|
81
|
+
#
|
|
82
|
+
# @note We do not clear the whole groups hash but rather we clear the partition hashes, so
|
|
83
|
+
# we save ourselves some objects allocations. We cannot clear the underlying arrays as they
|
|
84
|
+
# may be used in other threads for data processing, thus if we would clear it, we could
|
|
85
|
+
# potentially clear a raw messages array for a job that is in the jobs queue.
|
|
86
|
+
def clear
|
|
87
|
+
@size = 0
|
|
88
|
+
@groups.each_value(&:clear)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
# Updates the messages count if we performed any operations that could change the state
|
|
94
|
+
def recount!
|
|
95
|
+
@size = @groups.each_value.sum do |partitions|
|
|
96
|
+
partitions.each_value.map(&:count).sum
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Connection
|
|
5
|
+
# Manager for tracking changes in the partitions assignment.
|
|
6
|
+
#
|
|
7
|
+
# We need tracking of those to clean up consumers that will no longer process given partitions
|
|
8
|
+
# as they were taken away.
|
|
9
|
+
#
|
|
10
|
+
# @note Since this does not happen really often, we try to stick with same objects for the
|
|
11
|
+
# empty states most of the time, so we don't create many objects during the manager life
|
|
12
|
+
#
|
|
13
|
+
# @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
|
|
14
|
+
# that are lost, are those that got revoked but did not get re-assigned back. We do not
|
|
15
|
+
# expose this concept outside and we normalize to have them revoked, as it is irrelevant
|
|
16
|
+
# from the rest of the code perspective as only those that are lost are truly revoked.
|
|
17
|
+
class RebalanceManager
|
|
18
|
+
# Empty array for internal usage not to create new objects
|
|
19
|
+
EMPTY_ARRAY = [].freeze
|
|
20
|
+
|
|
21
|
+
attr_reader :assigned_partitions, :revoked_partitions
|
|
22
|
+
|
|
23
|
+
private_constant :EMPTY_ARRAY
|
|
24
|
+
|
|
25
|
+
# @return [RebalanceManager]
|
|
26
|
+
def initialize
|
|
27
|
+
@assigned_partitions = {}
|
|
28
|
+
@revoked_partitions = {}
|
|
29
|
+
@changed = false
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Resets the rebalance manager state
|
|
33
|
+
# This needs to be done before each polling loop as during the polling, the state may be
|
|
34
|
+
# changed
|
|
35
|
+
def clear
|
|
36
|
+
@assigned_partitions.clear
|
|
37
|
+
@revoked_partitions.clear
|
|
38
|
+
@changed = false
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @return [Boolean] indicates a state change in the partitions assignment
|
|
42
|
+
def changed?
|
|
43
|
+
@changed
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Callback that kicks in inside of rdkafka, when new partitions are assigned.
|
|
47
|
+
#
|
|
48
|
+
# @private
|
|
49
|
+
# @param _ [Rdkafka::Consumer]
|
|
50
|
+
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
|
51
|
+
def on_partitions_assigned(_, partitions)
|
|
52
|
+
@assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
|
53
|
+
@changed = true
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Callback that kicks in inside of rdkafka, when partitions are revoked.
|
|
57
|
+
#
|
|
58
|
+
# @private
|
|
59
|
+
# @param _ [Rdkafka::Consumer]
|
|
60
|
+
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
|
61
|
+
def on_partitions_revoked(_, partitions)
|
|
62
|
+
@revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
|
63
|
+
@changed = true
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# We consider as lost only partitions that were taken away and not re-assigned back to us
|
|
67
|
+
def lost_partitions
|
|
68
|
+
lost_partitions = {}
|
|
69
|
+
|
|
70
|
+
revoked_partitions.each do |topic, partitions|
|
|
71
|
+
lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
lost_partitions
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Contracts
|
|
5
|
+
# Base contract for all Karafka contracts
|
|
6
|
+
class Base < ::Karafka::Core::Contractable::Contract
|
|
7
|
+
# @param data [Hash] data for validation
|
|
8
|
+
# @return [Boolean] true if all good
|
|
9
|
+
# @raise [Errors::InvalidConfigurationError] invalid configuration error
|
|
10
|
+
# @note We use contracts only in the config validation context, so no need to add support
|
|
11
|
+
# for multiple error classes. It will be added when it will be needed.
|
|
12
|
+
def validate!(data)
|
|
13
|
+
super(data, Errors::InvalidConfigurationError)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|