karafka 1.4.13 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -2,69 +2,277 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # A single listener that listens to incoming messages from a single route
6
- # @note It does not loop on itself - it needs to be executed in a loop
7
- # @note Listener itself does nothing with the message - it will return to the block
8
- # a raw Kafka::FetchedMessage
5
+ # A single listener that listens to incoming messages from a single subscription group.
6
+ # It polls the messages and then enqueues jobs. It also takes care of potential recovery from
7
+ # critical errors by restarting everything in a safe manner.
8
+ #
9
+ # This is the heart of the consumption process.
9
10
  class Listener
10
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
11
- # on what topics and with what settings should we listen
11
+ include Helpers::Async
12
+
13
+ # Can be useful for logging
14
+ # @return [String] id of this listener
15
+ attr_reader :id
16
+
17
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
18
+ # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
12
19
  # @return [Karafka::Connection::Listener] listener instance
13
- def initialize(consumer_group)
14
- @consumer_group = consumer_group
20
+ def initialize(subscription_group, jobs_queue)
21
+ proc_config = ::Karafka::App.config.internal.processing
22
+
23
+ @id = SecureRandom.uuid
24
+ @subscription_group = subscription_group
25
+ @jobs_queue = jobs_queue
26
+ @coordinators = Processing::CoordinatorsBuffer.new
27
+ @client = Client.new(@subscription_group)
28
+ @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
29
+ @jobs_builder = proc_config.jobs_builder
30
+ @partitioner = proc_config.partitioner_class.new(subscription_group)
31
+ # We reference scheduler here as it is much faster than fetching this each time
32
+ @scheduler = proc_config.scheduler
33
+ # We keep one buffer for messages to preserve memory and not allocate extra objects
34
+ # We can do this that way because we always first schedule jobs using messages before we
35
+ # fetch another batch.
36
+ @messages_buffer = MessagesBuffer.new(subscription_group)
37
+ @mutex = Mutex.new
38
+ @stopped = false
15
39
  end
16
40
 
17
- # Runs prefetch callbacks and executes the main listener fetch loop
41
+ # Runs the main listener fetch loop.
42
+ #
43
+ # @note Prefetch callbacks can be used to seek offset or do other things before we actually
44
+ # start consuming data
18
45
  def call
19
46
  Karafka.monitor.instrument(
20
47
  'connection.listener.before_fetch_loop',
21
- consumer_group: @consumer_group,
22
- client: client
48
+ caller: self,
49
+ subscription_group: @subscription_group,
50
+ client: @client
23
51
  )
52
+
24
53
  fetch_loop
25
54
  end
26
55
 
56
+ # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
57
+ # stops kafka client.
58
+ #
59
+ # @note This method is not private despite being part of the fetch loop because in case of
60
+ # a forceful shutdown, it may be invoked from a separate thread
61
+ #
62
+ # @note We wrap it with a mutex exactly because of the above case of forceful shutdown
63
+ def shutdown
64
+ return if @stopped
65
+
66
+ @mutex.synchronize do
67
+ @stopped = true
68
+ @executors.clear
69
+ @coordinators.reset
70
+ @client.commit_offsets!
71
+ @client.stop
72
+ end
73
+ end
74
+
27
75
  private
28
76
 
29
- # Opens connection, gets messages and calls a block for each of the incoming messages
77
+ # Fetches the data and adds it to the jobs queue.
78
+ #
30
79
  # @note We catch all the errors here, so they don't affect other listeners (or this one)
31
80
  # so we will be able to listen and consume other incoming messages.
32
- # Since it is run inside Karafka::Connection::ActorCluster - catching all the exceptions
33
- # won't crash the whole cluster. Here we mostly focus on catching the exceptions related to
81
+ # Since it is run inside Karafka::Connection::Runner thread - catching all the exceptions
82
+ # won't crash the whole process. Here we mostly focus on catching the exceptions related to
34
83
  # Kafka connections / Internet connection issues / Etc. Business logic problems should not
35
- # propagate this far
84
+ # propagate this far.
36
85
  def fetch_loop
37
- # @note What happens here is a delegation of processing to a proper processor based
38
- # on the incoming messages characteristics
39
- client.fetch_loop do |raw_data, type|
40
- Karafka.monitor.instrument('connection.listener.fetch_loop')
41
-
42
- case type
43
- when :message
44
- MessageDelegator.call(@consumer_group.id, raw_data)
45
- when :batch
46
- BatchDelegator.call(@consumer_group.id, raw_data)
86
+ until Karafka::App.stopping?
87
+ Karafka.monitor.instrument(
88
+ 'connection.listener.fetch_loop',
89
+ caller: self,
90
+ client: @client
91
+ )
92
+
93
+ resume_paused_partitions
94
+
95
+ Karafka.monitor.instrument(
96
+ 'connection.listener.fetch_loop.received',
97
+ caller: self,
98
+ messages_buffer: @messages_buffer
99
+ ) do
100
+ # We need to fetch data before we revoke lost partitions details as during the polling
101
+ # the callbacks for tracking lost partitions are triggered. Otherwise we would be
102
+ # always one batch behind.
103
+ poll_and_remap_messages
47
104
  end
105
+
106
+ # If there were revoked partitions, we need to wait on their jobs to finish before
107
+ # distributing consuming jobs as upon revoking, we might get assigned to the same
108
+ # partitions, thus getting their jobs. The revoking jobs need to finish before
109
+ # appropriate consumers are taken down and re-created
110
+ build_and_schedule_revoke_lost_partitions_jobs
111
+
112
+ # We wait only on jobs from our subscription group. Other groups are independent.
113
+ # This will block on revoked jobs until they are finished. Those are not meant to last
114
+ # long and should not have any bigger impact on the system. Doing this in a blocking way
115
+ # simplifies the overall design and prevents from race conditions
116
+ wait
117
+
118
+ build_and_schedule_consumption_jobs
119
+
120
+ wait
121
+
122
+ # We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
123
+ # if needed by using manual offset management.
124
+ @client.commit_offsets
48
125
  end
126
+
127
+ # If we are stopping we will no longer schedule any jobs despite polling.
128
+ # We need to keep polling not to exceed the `max.poll.interval` for long-running
129
+ # non-blocking jobs and we need to allow them to finish. We however do not want to
130
+ # enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
131
+ # to detect shutdown in their long-running logic or else Karafka will force shutdown
132
+ # after a while.
133
+ #
134
+ # We do not care about resuming any partitions or lost jobs as we do not plan to do
135
+ # anything with them as we're in the shutdown phase.
136
+ wait_with_poll
137
+
138
+ # We do not want to schedule the shutdown jobs prior to finishing all the jobs
139
+ # (including non-blocking) as there might be a long-running job with a shutdown and then
140
+ # we would run two jobs in parallel for the same executor and consumer. We do not want that
141
+ # as it could create a race-condition.
142
+ build_and_schedule_shutdown_jobs
143
+
144
+ wait_with_poll
145
+
146
+ shutdown
147
+
49
148
  # This is on purpose - see the notes for this method
50
149
  # rubocop:disable Lint/RescueException
51
150
  rescue Exception => e
52
- Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
53
151
  # rubocop:enable Lint/RescueException
54
- # We can stop client without a problem, as it will reinitialize itself when running the
55
- # `fetch_loop` again
56
- @client.stop
57
- # We need to clear the consumers cache for current connection when fatal error happens and
58
- # we reset the connection. Otherwise for consumers with manual offset management, the
59
- # persistence might have stored some data that would be reprocessed
60
- Karafka::Persistence::Consumers.clear
61
- sleep(@consumer_group.reconnect_timeout) && retry
152
+ Karafka.monitor.instrument(
153
+ 'error.occurred',
154
+ caller: self,
155
+ error: e,
156
+ type: 'connection.listener.fetch_loop.error'
157
+ )
158
+
159
+ restart
160
+
161
+ sleep(1) && retry
162
+ end
163
+
164
+ # Resumes processing of partitions that were paused due to an error.
165
+ def resume_paused_partitions
166
+ @coordinators.resume do |topic, partition|
167
+ @client.resume(topic, partition)
168
+ end
169
+ end
170
+
171
+ # Enqueues revoking jobs for partitions that were taken away from the running process.
172
+ def build_and_schedule_revoke_lost_partitions_jobs
173
+ revoked_partitions = @client.rebalance_manager.revoked_partitions
174
+
175
+ # Stop early to save on some execution and array allocation
176
+ return if revoked_partitions.empty?
177
+
178
+ jobs = []
179
+
180
+ revoked_partitions.each do |topic, partitions|
181
+ partitions.each do |partition|
182
+ @coordinators.revoke(topic, partition)
183
+
184
+ # There may be a case where we have lost partition of which data we have never
185
+ # processed (if it was assigned and revoked really fast), thus we may not have it
186
+ # here. In cases like this, we do not run a revocation job
187
+ @executors.find_all(topic, partition).each do |executor|
188
+ jobs << @jobs_builder.revoked(executor)
189
+ end
190
+
191
+ # We need to remove all the executors of a given topic partition that we have lost, so
192
+ # next time we pick up it's work, new executors kick in. This may be needed especially
193
+ # for LRJ where we could end up with a race condition
194
+ # This revocation needs to happen after the jobs are scheduled, otherwise they would
195
+ # be scheduled with new executors instead of old
196
+ @executors.revoke(topic, partition)
197
+ end
198
+ end
199
+
200
+ @scheduler.schedule_revocation(@jobs_queue, jobs)
201
+ end
202
+
203
+ # Enqueues the shutdown jobs for all the executors that exist in our subscription group
204
+ def build_and_schedule_shutdown_jobs
205
+ jobs = []
206
+
207
+ @executors.each do |_, _, executor|
208
+ jobs << @jobs_builder.shutdown(executor)
209
+ end
210
+
211
+ @scheduler.schedule_shutdown(@jobs_queue, jobs)
212
+ end
213
+
214
+ # Polls messages within the time and amount boundaries defined in the settings and then
215
+ # builds karafka messages based on the raw rdkafka messages buffer returned by the
216
+ # `#batch_poll` method.
217
+ #
218
+ # @note There are two buffers, one for raw messages and one for "built" karafka messages
219
+ def poll_and_remap_messages
220
+ @messages_buffer.remap(
221
+ @client.batch_poll
222
+ )
223
+ end
224
+
225
+ # Takes the messages per topic partition and enqueues processing jobs in threads using
226
+ # given scheduler.
227
+ def build_and_schedule_consumption_jobs
228
+ return if @messages_buffer.empty?
229
+
230
+ jobs = []
231
+
232
+ @messages_buffer.each do |topic, partition, messages|
233
+ coordinator = @coordinators.find_or_create(topic, partition)
234
+
235
+ # Start work coordination for this topic partition
236
+ coordinator.start(messages)
237
+
238
+ @partitioner.call(topic, messages) do |group_id, partition_messages|
239
+ # Count the job we're going to create here
240
+ coordinator.increment
241
+
242
+ executor = @executors.find_or_create(topic, partition, group_id)
243
+
244
+ jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
245
+ end
246
+ end
247
+
248
+ @scheduler.schedule_consumption(@jobs_queue, jobs)
249
+ end
250
+
251
+ # Waits for all the jobs from a given subscription group to finish before moving forward
252
+ def wait
253
+ @jobs_queue.wait(@subscription_group.id)
254
+ end
255
+
256
+ # Waits without blocking the polling
257
+ # This should be used only when we no longer plan to use any incoming data and we can safely
258
+ # discard it
259
+ def wait_with_poll
260
+ @client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
62
261
  end
63
262
 
64
- # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
65
- # consumption
66
- def client
67
- @client ||= Client.new(@consumer_group)
263
+ # We can stop client without a problem, as it will reinitialize itself when running the
264
+ # `#fetch_loop` again. We just need to remember to also reset the runner as it is a long
265
+ # running one, so with a new connection to Kafka, we need to initialize the state of the
266
+ # runner and underlying consumers once again.
267
+ def restart
268
+ # If there was any problem with processing, before we reset things we need to make sure,
269
+ # there are no jobs in the queue. Otherwise it could lead to leakage in between client
270
+ # resetting.
271
+ @jobs_queue.wait(@subscription_group.id)
272
+ @jobs_queue.clear(@subscription_group.id)
273
+ @client.reset
274
+ @coordinators.reset
275
+ @executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
68
276
  end
69
277
  end
70
278
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Abstraction layer around listeners batch.
6
+ class ListenersBatch
7
+ include Enumerable
8
+
9
+ # @param jobs_queue [JobsQueue]
10
+ # @return [ListenersBatch]
11
+ def initialize(jobs_queue)
12
+ @batch = App.subscription_groups.map do |subscription_group|
13
+ Connection::Listener.new(subscription_group, jobs_queue)
14
+ end
15
+ end
16
+
17
+ # Iterates over available listeners and yields each listener
18
+ # @param block [Proc] block we want to run
19
+ def each(&block)
20
+ @batch.each(&block)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Buffer used to build and store karafka messages built based on raw librdkafka messages.
6
+ #
7
+ # Why do we have two buffers? `RawMessagesBuffer` is used to store raw messages and to handle
8
+ # cases related to partition revocation and reconnections. It is "internal" to the listening
9
+ # process. `MessagesBuffer` on the other hand is used to "translate" those raw messages that
10
+ # we know that are ok into Karafka messages and to simplify further work with them.
11
+ #
12
+ # While it adds a bit of overhead, it makes conceptual things much easier and it adds only two
13
+ # simple hash iterations over messages batch.
14
+ #
15
+ # @note This buffer is NOT thread safe. We do not worry about it as we do not use it outside
16
+ # of the main listener loop. It can be cleared after the jobs are scheduled with messages
17
+ # it stores, because messages arrays are not "cleared" in any way directly and their
18
+ # reference stays.
19
+ class MessagesBuffer
20
+ attr_reader :size
21
+
22
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
23
+ def initialize(subscription_group)
24
+ @subscription_group = subscription_group
25
+ @size = 0
26
+ @groups = Hash.new do |topic_groups, topic|
27
+ topic_groups[topic] = Hash.new do |partition_groups, partition|
28
+ partition_groups[partition] = []
29
+ end
30
+ end
31
+ end
32
+
33
+ # Remaps raw messages from the raw messages buffer to Karafka messages
34
+ # @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
35
+ def remap(raw_messages_buffer)
36
+ clear unless @size.zero?
37
+
38
+ # Since it happens "right after" we've received the messages, it is close enough it time
39
+ # to be used as the moment we received messages.
40
+ received_at = Time.now
41
+
42
+ raw_messages_buffer.each do |topic, partition, messages|
43
+ @size += messages.count
44
+
45
+ ktopic = @subscription_group.topics.find(topic)
46
+
47
+ @groups[topic][partition] = messages.map do |message|
48
+ Messages::Builders::Message.call(
49
+ message,
50
+ ktopic,
51
+ received_at
52
+ )
53
+ end
54
+ end
55
+ end
56
+
57
+ # Allows to iterate over all the topics and partitions messages
58
+ #
59
+ # @yieldparam [String] topic name
60
+ # @yieldparam [Integer] partition number
61
+ # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
62
+ def each
63
+ @groups.each do |topic, partitions|
64
+ partitions.each do |partition, messages|
65
+ yield(topic, partition, messages)
66
+ end
67
+ end
68
+ end
69
+
70
+ # @return [Boolean] is the buffer empty or does it contain any messages
71
+ def empty?
72
+ @size.zero?
73
+ end
74
+
75
+ private
76
+
77
+ # Clears the buffer completely
78
+ def clear
79
+ @size = 0
80
+ @groups.clear
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Partitions pauses management abstraction layer.
6
+ # It aggregates all the pauses for all the partitions that we're working with.
7
+ class PausesManager
8
+ # @return [Karafka::Connection::PausesManager] pauses manager
9
+ def initialize
10
+ @pauses = Hash.new do |h, k|
11
+ h[k] = {}
12
+ end
13
+ end
14
+
15
+ # Creates or fetches pause tracker of a given topic partition.
16
+ #
17
+ # @param topic [String] topic name
18
+ # @param partition [Integer] partition number
19
+ # @return [Karafka::TimeTrackers::Pause] pause tracker instance
20
+ def fetch(topic, partition)
21
+ @pauses[topic][partition] ||= TimeTrackers::Pause.new(
22
+ timeout: Karafka::App.config.pause_timeout,
23
+ max_timeout: Karafka::App.config.pause_max_timeout,
24
+ exponential_backoff: Karafka::App.config.pause_with_exponential_backoff
25
+ )
26
+ end
27
+
28
+ # Resumes processing of partitions for which pause time has ended.
29
+ #
30
+ # @yieldparam [String] topic name
31
+ # @yieldparam [Integer] partition number
32
+ def resume
33
+ @pauses.each do |topic, partitions|
34
+ partitions.each do |partition, pause|
35
+ next unless pause.paused?
36
+ next unless pause.expired?
37
+
38
+ pause.resume
39
+
40
+ yield(topic, partition)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Buffer for raw librdkafka messages.
6
+ #
7
+ # When message is added to this buffer, it gets assigned to an array with other messages from
8
+ # the same topic and partition.
9
+ #
10
+ # @note This buffer is NOT threadsafe.
11
+ #
12
+ # @note We store data here in groups per topic partition to handle the revocation case, where
13
+ # we may need to remove messages from a single topic partition.
14
+ class RawMessagesBuffer
15
+ attr_reader :size
16
+
17
+ # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
+ def initialize
19
+ @size = 0
20
+ @groups = Hash.new do |topic_groups, topic|
21
+ topic_groups[topic] = Hash.new do |partition_groups, partition|
22
+ partition_groups[partition] = []
23
+ end
24
+ end
25
+ end
26
+
27
+ # Adds a message to the buffer.
28
+ #
29
+ # @param message [Rdkafka::Consumer::Message] raw rdkafka message
30
+ # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
31
+ def <<(message)
32
+ @size += 1
33
+ @groups[message.topic][message.partition] << message
34
+ end
35
+
36
+ # Allows to iterate over all the topics and partitions messages
37
+ #
38
+ # @yieldparam [String] topic name
39
+ # @yieldparam [Integer] partition number
40
+ # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
41
+ def each
42
+ @groups.each do |topic, partitions|
43
+ partitions.each do |partition, messages|
44
+ yield(topic, partition, messages)
45
+ end
46
+ end
47
+ end
48
+
49
+ # Removes given topic and partition data out of the buffer
50
+ # This is used when there's a partition revocation
51
+ # @param topic [String] topic we're interested in
52
+ # @param partition [Integer] partition of which data we want to remove
53
+ def delete(topic, partition)
54
+ return unless @groups.key?(topic)
55
+ return unless @groups.fetch(topic).key?(partition)
56
+
57
+ topic_data = @groups.fetch(topic)
58
+ topic_data.delete(partition)
59
+
60
+ recount!
61
+
62
+ # If there are no more partitions to handle in a given topic, remove it completely
63
+ @groups.delete(topic) if topic_data.empty?
64
+ end
65
+
66
+ # Removes duplicated messages from the same partitions
67
+ # This should be used only when rebalance occurs, as we may get data again we already have
68
+ # due to the processing from the last offset. In cases like this, we may get same data
69
+ # again and we do want to ensure as few duplications as possible
70
+ def uniq!
71
+ @groups.each_value do |partitions|
72
+ partitions.each_value do |messages|
73
+ messages.uniq!(&:offset)
74
+ end
75
+ end
76
+
77
+ recount!
78
+ end
79
+
80
+ # Removes all the data from the buffer.
81
+ #
82
+ # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
83
+ # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
84
+ # may be used in other threads for data processing, thus if we would clear it, we could
85
+ # potentially clear a raw messages array for a job that is in the jobs queue.
86
+ def clear
87
+ @size = 0
88
+ @groups.each_value(&:clear)
89
+ end
90
+
91
+ private
92
+
93
+ # Updates the messages count if we performed any operations that could change the state
94
+ def recount!
95
+ @size = @groups.each_value.sum do |partitions|
96
+ partitions.each_value.map(&:count).sum
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Manager for tracking changes in the partitions assignment.
6
+ #
7
+ # We need tracking of those to clean up consumers that will no longer process given partitions
8
+ # as they were taken away.
9
+ #
10
+ # @note Since this does not happen really often, we try to stick with same objects for the
11
+ # empty states most of the time, so we don't create many objects during the manager life
12
+ #
13
+ # @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
14
+ # that are lost, are those that got revoked but did not get re-assigned back. We do not
15
+ # expose this concept outside and we normalize to have them revoked, as it is irrelevant
16
+ # from the rest of the code perspective as only those that are lost are truly revoked.
17
+ class RebalanceManager
18
+ # Empty array for internal usage not to create new objects
19
+ EMPTY_ARRAY = [].freeze
20
+
21
+ attr_reader :assigned_partitions, :revoked_partitions
22
+
23
+ private_constant :EMPTY_ARRAY
24
+
25
+ # @return [RebalanceManager]
26
+ def initialize
27
+ @assigned_partitions = {}
28
+ @revoked_partitions = {}
29
+ @changed = false
30
+ end
31
+
32
+ # Resets the rebalance manager state
33
+ # This needs to be done before each polling loop as during the polling, the state may be
34
+ # changed
35
+ def clear
36
+ @assigned_partitions.clear
37
+ @revoked_partitions.clear
38
+ @changed = false
39
+ end
40
+
41
+ # @return [Boolean] indicates a state change in the partitions assignment
42
+ def changed?
43
+ @changed
44
+ end
45
+
46
+ # Callback that kicks in inside of rdkafka, when new partitions are assigned.
47
+ #
48
+ # @private
49
+ # @param _ [Rdkafka::Consumer]
50
+ # @param partitions [Rdkafka::Consumer::TopicPartitionList]
51
+ def on_partitions_assigned(_, partitions)
52
+ @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
53
+ @changed = true
54
+ end
55
+
56
+ # Callback that kicks in inside of rdkafka, when partitions are revoked.
57
+ #
58
+ # @private
59
+ # @param _ [Rdkafka::Consumer]
60
+ # @param partitions [Rdkafka::Consumer::TopicPartitionList]
61
+ def on_partitions_revoked(_, partitions)
62
+ @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
63
+ @changed = true
64
+ end
65
+
66
+ # We consider as lost only partitions that were taken away and not re-assigned back to us
67
+ def lost_partitions
68
+ lost_partitions = {}
69
+
70
+ revoked_partitions.each do |topic, partitions|
71
+ lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
72
+ end
73
+
74
+ lost_partitions
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Contracts
5
+ # Base contract for all Karafka contracts
6
+ class Base < ::Karafka::Core::Contractable::Contract
7
+ # @param data [Hash] data for validation
8
+ # @return [Boolean] true if all good
9
+ # @raise [Errors::InvalidConfigurationError] invalid configuration error
10
+ # @note We use contracts only in the config validation context, so no need to add support
11
+ # for multiple error classes. It will be added when it will be needed.
12
+ def validate!(data)
13
+ super(data, Errors::InvalidConfigurationError)
14
+ end
15
+ end
16
+ end
17
+ end