karafka 1.4.13 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -2,69 +2,277 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # A single listener that listens to incoming messages from a single route
6
- # @note It does not loop on itself - it needs to be executed in a loop
7
- # @note Listener itself does nothing with the message - it will return to the block
8
- # a raw Kafka::FetchedMessage
5
+ # A single listener that listens to incoming messages from a single subscription group.
6
+ # It polls the messages and then enqueues jobs. It also takes care of potential recovery from
7
+ # critical errors by restarting everything in a safe manner.
8
+ #
9
+ # This is the heart of the consumption process.
9
10
  class Listener
10
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
11
- # on what topics and with what settings should we listen
11
+ include Helpers::Async
12
+
13
+ # Can be useful for logging
14
+ # @return [String] id of this listener
15
+ attr_reader :id
16
+
17
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
18
+ # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
12
19
  # @return [Karafka::Connection::Listener] listener instance
13
- def initialize(consumer_group)
14
- @consumer_group = consumer_group
20
+ def initialize(subscription_group, jobs_queue)
21
+ proc_config = ::Karafka::App.config.internal.processing
22
+
23
+ @id = SecureRandom.uuid
24
+ @subscription_group = subscription_group
25
+ @jobs_queue = jobs_queue
26
+ @coordinators = Processing::CoordinatorsBuffer.new
27
+ @client = Client.new(@subscription_group)
28
+ @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
29
+ @jobs_builder = proc_config.jobs_builder
30
+ @partitioner = proc_config.partitioner_class.new(subscription_group)
31
+ # We reference scheduler here as it is much faster than fetching this each time
32
+ @scheduler = proc_config.scheduler
33
+ # We keep one buffer for messages to preserve memory and not allocate extra objects
34
+ # We can do this that way because we always first schedule jobs using messages before we
35
+ # fetch another batch.
36
+ @messages_buffer = MessagesBuffer.new(subscription_group)
37
+ @mutex = Mutex.new
38
+ @stopped = false
15
39
  end
16
40
 
17
- # Runs prefetch callbacks and executes the main listener fetch loop
41
+ # Runs the main listener fetch loop.
42
+ #
43
+ # @note Prefetch callbacks can be used to seek offset or do other things before we actually
44
+ # start consuming data
18
45
  def call
19
46
  Karafka.monitor.instrument(
20
47
  'connection.listener.before_fetch_loop',
21
- consumer_group: @consumer_group,
22
- client: client
48
+ caller: self,
49
+ subscription_group: @subscription_group,
50
+ client: @client
23
51
  )
52
+
24
53
  fetch_loop
25
54
  end
26
55
 
56
+ # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
57
+ # stops kafka client.
58
+ #
59
+ # @note This method is not private despite being part of the fetch loop because in case of
60
+ # a forceful shutdown, it may be invoked from a separate thread
61
+ #
62
+ # @note We wrap it with a mutex exactly because of the above case of forceful shutdown
63
+ def shutdown
64
+ return if @stopped
65
+
66
+ @mutex.synchronize do
67
+ @stopped = true
68
+ @executors.clear
69
+ @coordinators.reset
70
+ @client.commit_offsets!
71
+ @client.stop
72
+ end
73
+ end
74
+
27
75
  private
28
76
 
29
- # Opens connection, gets messages and calls a block for each of the incoming messages
77
+ # Fetches the data and adds it to the jobs queue.
78
+ #
30
79
  # @note We catch all the errors here, so they don't affect other listeners (or this one)
31
80
  # so we will be able to listen and consume other incoming messages.
32
- # Since it is run inside Karafka::Connection::ActorCluster - catching all the exceptions
33
- # won't crash the whole cluster. Here we mostly focus on catching the exceptions related to
81
+ # Since it is run inside Karafka::Connection::Runner thread - catching all the exceptions
82
+ # won't crash the whole process. Here we mostly focus on catching the exceptions related to
34
83
  # Kafka connections / Internet connection issues / Etc. Business logic problems should not
35
- # propagate this far
84
+ # propagate this far.
36
85
  def fetch_loop
37
- # @note What happens here is a delegation of processing to a proper processor based
38
- # on the incoming messages characteristics
39
- client.fetch_loop do |raw_data, type|
40
- Karafka.monitor.instrument('connection.listener.fetch_loop')
41
-
42
- case type
43
- when :message
44
- MessageDelegator.call(@consumer_group.id, raw_data)
45
- when :batch
46
- BatchDelegator.call(@consumer_group.id, raw_data)
86
+ until Karafka::App.stopping?
87
+ Karafka.monitor.instrument(
88
+ 'connection.listener.fetch_loop',
89
+ caller: self,
90
+ client: @client
91
+ )
92
+
93
+ resume_paused_partitions
94
+
95
+ Karafka.monitor.instrument(
96
+ 'connection.listener.fetch_loop.received',
97
+ caller: self,
98
+ messages_buffer: @messages_buffer
99
+ ) do
100
+ # We need to fetch data before we revoke lost partitions details as during the polling
101
+ # the callbacks for tracking lost partitions are triggered. Otherwise we would be
102
+ # always one batch behind.
103
+ poll_and_remap_messages
47
104
  end
105
+
106
+ # If there were revoked partitions, we need to wait on their jobs to finish before
107
+ # distributing consuming jobs as upon revoking, we might get assigned to the same
108
+ # partitions, thus getting their jobs. The revoking jobs need to finish before
109
+ # appropriate consumers are taken down and re-created
110
+ build_and_schedule_revoke_lost_partitions_jobs
111
+
112
+ # We wait only on jobs from our subscription group. Other groups are independent.
113
+ # This will block on revoked jobs until they are finished. Those are not meant to last
114
+ # long and should not have any bigger impact on the system. Doing this in a blocking way
115
+ # simplifies the overall design and prevents from race conditions
116
+ wait
117
+
118
+ build_and_schedule_consumption_jobs
119
+
120
+ wait
121
+
122
+ # We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
123
+ # if needed by using manual offset management.
124
+ @client.commit_offsets
48
125
  end
126
+
127
+ # If we are stopping we will no longer schedule any jobs despite polling.
128
+ # We need to keep polling not to exceed the `max.poll.interval` for long-running
129
+ # non-blocking jobs and we need to allow them to finish. We however do not want to
130
+ # enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
131
+ # to detect shutdown in their long-running logic or else Karafka will force shutdown
132
+ # after a while.
133
+ #
134
+ # We do not care about resuming any partitions or lost jobs as we do not plan to do
135
+ # anything with them as we're in the shutdown phase.
136
+ wait_with_poll
137
+
138
+ # We do not want to schedule the shutdown jobs prior to finishing all the jobs
139
+ # (including non-blocking) as there might be a long-running job with a shutdown and then
140
+ # we would run two jobs in parallel for the same executor and consumer. We do not want that
141
+ # as it could create a race-condition.
142
+ build_and_schedule_shutdown_jobs
143
+
144
+ wait_with_poll
145
+
146
+ shutdown
147
+
49
148
  # This is on purpose - see the notes for this method
50
149
  # rubocop:disable Lint/RescueException
51
150
  rescue Exception => e
52
- Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
53
151
  # rubocop:enable Lint/RescueException
54
- # We can stop client without a problem, as it will reinitialize itself when running the
55
- # `fetch_loop` again
56
- @client.stop
57
- # We need to clear the consumers cache for current connection when fatal error happens and
58
- # we reset the connection. Otherwise for consumers with manual offset management, the
59
- # persistence might have stored some data that would be reprocessed
60
- Karafka::Persistence::Consumers.clear
61
- sleep(@consumer_group.reconnect_timeout) && retry
152
+ Karafka.monitor.instrument(
153
+ 'error.occurred',
154
+ caller: self,
155
+ error: e,
156
+ type: 'connection.listener.fetch_loop.error'
157
+ )
158
+
159
+ restart
160
+
161
+ sleep(1) && retry
162
+ end
163
+
164
+ # Resumes processing of partitions that were paused due to an error.
165
+ def resume_paused_partitions
166
+ @coordinators.resume do |topic, partition|
167
+ @client.resume(topic, partition)
168
+ end
169
+ end
170
+
171
+ # Enqueues revoking jobs for partitions that were taken away from the running process.
172
+ def build_and_schedule_revoke_lost_partitions_jobs
173
+ revoked_partitions = @client.rebalance_manager.revoked_partitions
174
+
175
+ # Stop early to save on some execution and array allocation
176
+ return if revoked_partitions.empty?
177
+
178
+ jobs = []
179
+
180
+ revoked_partitions.each do |topic, partitions|
181
+ partitions.each do |partition|
182
+ @coordinators.revoke(topic, partition)
183
+
184
+ # There may be a case where we have lost partition of which data we have never
185
+ # processed (if it was assigned and revoked really fast), thus we may not have it
186
+ # here. In cases like this, we do not run a revocation job
187
+ @executors.find_all(topic, partition).each do |executor|
188
+ jobs << @jobs_builder.revoked(executor)
189
+ end
190
+
191
+ # We need to remove all the executors of a given topic partition that we have lost, so
192
+ # next time we pick up it's work, new executors kick in. This may be needed especially
193
+ # for LRJ where we could end up with a race condition
194
+ # This revocation needs to happen after the jobs are scheduled, otherwise they would
195
+ # be scheduled with new executors instead of old
196
+ @executors.revoke(topic, partition)
197
+ end
198
+ end
199
+
200
+ @scheduler.schedule_revocation(@jobs_queue, jobs)
201
+ end
202
+
203
+ # Enqueues the shutdown jobs for all the executors that exist in our subscription group
204
+ def build_and_schedule_shutdown_jobs
205
+ jobs = []
206
+
207
+ @executors.each do |_, _, executor|
208
+ jobs << @jobs_builder.shutdown(executor)
209
+ end
210
+
211
+ @scheduler.schedule_shutdown(@jobs_queue, jobs)
212
+ end
213
+
214
+ # Polls messages within the time and amount boundaries defined in the settings and then
215
+ # builds karafka messages based on the raw rdkafka messages buffer returned by the
216
+ # `#batch_poll` method.
217
+ #
218
+ # @note There are two buffers, one for raw messages and one for "built" karafka messages
219
+ def poll_and_remap_messages
220
+ @messages_buffer.remap(
221
+ @client.batch_poll
222
+ )
223
+ end
224
+
225
+ # Takes the messages per topic partition and enqueues processing jobs in threads using
226
+ # given scheduler.
227
+ def build_and_schedule_consumption_jobs
228
+ return if @messages_buffer.empty?
229
+
230
+ jobs = []
231
+
232
+ @messages_buffer.each do |topic, partition, messages|
233
+ coordinator = @coordinators.find_or_create(topic, partition)
234
+
235
+ # Start work coordination for this topic partition
236
+ coordinator.start(messages)
237
+
238
+ @partitioner.call(topic, messages) do |group_id, partition_messages|
239
+ # Count the job we're going to create here
240
+ coordinator.increment
241
+
242
+ executor = @executors.find_or_create(topic, partition, group_id)
243
+
244
+ jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
245
+ end
246
+ end
247
+
248
+ @scheduler.schedule_consumption(@jobs_queue, jobs)
249
+ end
250
+
251
+ # Waits for all the jobs from a given subscription group to finish before moving forward
252
+ def wait
253
+ @jobs_queue.wait(@subscription_group.id)
254
+ end
255
+
256
+ # Waits without blocking the polling
257
+ # This should be used only when we no longer plan to use any incoming data and we can safely
258
+ # discard it
259
+ def wait_with_poll
260
+ @client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
62
261
  end
63
262
 
64
- # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
65
- # consumption
66
- def client
67
- @client ||= Client.new(@consumer_group)
263
+ # We can stop client without a problem, as it will reinitialize itself when running the
264
+ # `#fetch_loop` again. We just need to remember to also reset the runner as it is a long
265
+ # running one, so with a new connection to Kafka, we need to initialize the state of the
266
+ # runner and underlying consumers once again.
267
+ def restart
268
+ # If there was any problem with processing, before we reset things we need to make sure,
269
+ # there are no jobs in the queue. Otherwise it could lead to leakage in between client
270
+ # resetting.
271
+ @jobs_queue.wait(@subscription_group.id)
272
+ @jobs_queue.clear(@subscription_group.id)
273
+ @client.reset
274
+ @coordinators.reset
275
+ @executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
68
276
  end
69
277
  end
70
278
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Abstraction layer around listeners batch.
6
+ class ListenersBatch
7
+ include Enumerable
8
+
9
+ # @param jobs_queue [JobsQueue]
10
+ # @return [ListenersBatch]
11
+ def initialize(jobs_queue)
12
+ @batch = App.subscription_groups.map do |subscription_group|
13
+ Connection::Listener.new(subscription_group, jobs_queue)
14
+ end
15
+ end
16
+
17
+ # Iterates over available listeners and yields each listener
18
+ # @param block [Proc] block we want to run
19
+ def each(&block)
20
+ @batch.each(&block)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Buffer used to build and store karafka messages built based on raw librdkafka messages.
6
+ #
7
+ # Why do we have two buffers? `RawMessagesBuffer` is used to store raw messages and to handle
8
+ # cases related to partition revocation and reconnections. It is "internal" to the listening
9
+ # process. `MessagesBuffer` on the other hand is used to "translate" those raw messages that
10
+ # we know that are ok into Karafka messages and to simplify further work with them.
11
+ #
12
+ # While it adds a bit of overhead, it makes conceptual things much easier and it adds only two
13
+ # simple hash iterations over messages batch.
14
+ #
15
+ # @note This buffer is NOT thread safe. We do not worry about it as we do not use it outside
16
+ # of the main listener loop. It can be cleared after the jobs are scheduled with messages
17
+ # it stores, because messages arrays are not "cleared" in any way directly and their
18
+ # reference stays.
19
+ class MessagesBuffer
20
+ attr_reader :size
21
+
22
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
23
+ def initialize(subscription_group)
24
+ @subscription_group = subscription_group
25
+ @size = 0
26
+ @groups = Hash.new do |topic_groups, topic|
27
+ topic_groups[topic] = Hash.new do |partition_groups, partition|
28
+ partition_groups[partition] = []
29
+ end
30
+ end
31
+ end
32
+
33
+ # Remaps raw messages from the raw messages buffer to Karafka messages
34
+ # @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
35
+ def remap(raw_messages_buffer)
36
+ clear unless @size.zero?
37
+
38
+ # Since it happens "right after" we've received the messages, it is close enough it time
39
+ # to be used as the moment we received messages.
40
+ received_at = Time.now
41
+
42
+ raw_messages_buffer.each do |topic, partition, messages|
43
+ @size += messages.count
44
+
45
+ ktopic = @subscription_group.topics.find(topic)
46
+
47
+ @groups[topic][partition] = messages.map do |message|
48
+ Messages::Builders::Message.call(
49
+ message,
50
+ ktopic,
51
+ received_at
52
+ )
53
+ end
54
+ end
55
+ end
56
+
57
+ # Allows to iterate over all the topics and partitions messages
58
+ #
59
+ # @yieldparam [String] topic name
60
+ # @yieldparam [Integer] partition number
61
+ # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
62
+ def each
63
+ @groups.each do |topic, partitions|
64
+ partitions.each do |partition, messages|
65
+ yield(topic, partition, messages)
66
+ end
67
+ end
68
+ end
69
+
70
+ # @return [Boolean] is the buffer empty or does it contain any messages
71
+ def empty?
72
+ @size.zero?
73
+ end
74
+
75
+ private
76
+
77
+ # Clears the buffer completely
78
+ def clear
79
+ @size = 0
80
+ @groups.clear
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Partitions pauses management abstraction layer.
6
+ # It aggregates all the pauses for all the partitions that we're working with.
7
+ class PausesManager
8
+ # @return [Karafka::Connection::PausesManager] pauses manager
9
+ def initialize
10
+ @pauses = Hash.new do |h, k|
11
+ h[k] = {}
12
+ end
13
+ end
14
+
15
+ # Creates or fetches pause tracker of a given topic partition.
16
+ #
17
+ # @param topic [String] topic name
18
+ # @param partition [Integer] partition number
19
+ # @return [Karafka::TimeTrackers::Pause] pause tracker instance
20
+ def fetch(topic, partition)
21
+ @pauses[topic][partition] ||= TimeTrackers::Pause.new(
22
+ timeout: Karafka::App.config.pause_timeout,
23
+ max_timeout: Karafka::App.config.pause_max_timeout,
24
+ exponential_backoff: Karafka::App.config.pause_with_exponential_backoff
25
+ )
26
+ end
27
+
28
+ # Resumes processing of partitions for which pause time has ended.
29
+ #
30
+ # @yieldparam [String] topic name
31
+ # @yieldparam [Integer] partition number
32
+ def resume
33
+ @pauses.each do |topic, partitions|
34
+ partitions.each do |partition, pause|
35
+ next unless pause.paused?
36
+ next unless pause.expired?
37
+
38
+ pause.resume
39
+
40
+ yield(topic, partition)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Buffer for raw librdkafka messages.
6
+ #
7
+ # When message is added to this buffer, it gets assigned to an array with other messages from
8
+ # the same topic and partition.
9
+ #
10
+ # @note This buffer is NOT threadsafe.
11
+ #
12
+ # @note We store data here in groups per topic partition to handle the revocation case, where
13
+ # we may need to remove messages from a single topic partition.
14
+ class RawMessagesBuffer
15
+ attr_reader :size
16
+
17
+ # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
+ def initialize
19
+ @size = 0
20
+ @groups = Hash.new do |topic_groups, topic|
21
+ topic_groups[topic] = Hash.new do |partition_groups, partition|
22
+ partition_groups[partition] = []
23
+ end
24
+ end
25
+ end
26
+
27
+ # Adds a message to the buffer.
28
+ #
29
+ # @param message [Rdkafka::Consumer::Message] raw rdkafka message
30
+ # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
31
+ def <<(message)
32
+ @size += 1
33
+ @groups[message.topic][message.partition] << message
34
+ end
35
+
36
+ # Allows to iterate over all the topics and partitions messages
37
+ #
38
+ # @yieldparam [String] topic name
39
+ # @yieldparam [Integer] partition number
40
+ # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
41
+ def each
42
+ @groups.each do |topic, partitions|
43
+ partitions.each do |partition, messages|
44
+ yield(topic, partition, messages)
45
+ end
46
+ end
47
+ end
48
+
49
+ # Removes given topic and partition data out of the buffer
50
+ # This is used when there's a partition revocation
51
+ # @param topic [String] topic we're interested in
52
+ # @param partition [Integer] partition of which data we want to remove
53
+ def delete(topic, partition)
54
+ return unless @groups.key?(topic)
55
+ return unless @groups.fetch(topic).key?(partition)
56
+
57
+ topic_data = @groups.fetch(topic)
58
+ topic_data.delete(partition)
59
+
60
+ recount!
61
+
62
+ # If there are no more partitions to handle in a given topic, remove it completely
63
+ @groups.delete(topic) if topic_data.empty?
64
+ end
65
+
66
+ # Removes duplicated messages from the same partitions
67
+ # This should be used only when rebalance occurs, as we may get data again we already have
68
+ # due to the processing from the last offset. In cases like this, we may get same data
69
+ # again and we do want to ensure as few duplications as possible
70
+ def uniq!
71
+ @groups.each_value do |partitions|
72
+ partitions.each_value do |messages|
73
+ messages.uniq!(&:offset)
74
+ end
75
+ end
76
+
77
+ recount!
78
+ end
79
+
80
+ # Removes all the data from the buffer.
81
+ #
82
+ # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
83
+ # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
84
+ # may be used in other threads for data processing, thus if we would clear it, we could
85
+ # potentially clear a raw messages array for a job that is in the jobs queue.
86
+ def clear
87
+ @size = 0
88
+ @groups.each_value(&:clear)
89
+ end
90
+
91
+ private
92
+
93
+ # Updates the messages count if we performed any operations that could change the state
94
+ def recount!
95
+ @size = @groups.each_value.sum do |partitions|
96
+ partitions.each_value.map(&:count).sum
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Manager for tracking changes in the partitions assignment.
6
+ #
7
+ # We need tracking of those to clean up consumers that will no longer process given partitions
8
+ # as they were taken away.
9
+ #
10
+ # @note Since this does not happen really often, we try to stick with same objects for the
11
+ # empty states most of the time, so we don't create many objects during the manager life
12
+ #
13
+ # @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
14
+ # that are lost, are those that got revoked but did not get re-assigned back. We do not
15
+ # expose this concept outside and we normalize to have them revoked, as it is irrelevant
16
+ # from the rest of the code perspective as only those that are lost are truly revoked.
17
+ class RebalanceManager
18
+ # Empty array for internal usage not to create new objects
19
+ EMPTY_ARRAY = [].freeze
20
+
21
+ attr_reader :assigned_partitions, :revoked_partitions
22
+
23
+ private_constant :EMPTY_ARRAY
24
+
25
+ # @return [RebalanceManager]
26
+ def initialize
27
+ @assigned_partitions = {}
28
+ @revoked_partitions = {}
29
+ @changed = false
30
+ end
31
+
32
+ # Resets the rebalance manager state
33
+ # This needs to be done before each polling loop as during the polling, the state may be
34
+ # changed
35
+ def clear
36
+ @assigned_partitions.clear
37
+ @revoked_partitions.clear
38
+ @changed = false
39
+ end
40
+
41
+ # @return [Boolean] indicates a state change in the partitions assignment
42
+ def changed?
43
+ @changed
44
+ end
45
+
46
+ # Callback that kicks in inside of rdkafka, when new partitions are assigned.
47
+ #
48
+ # @private
49
+ # @param _ [Rdkafka::Consumer]
50
+ # @param partitions [Rdkafka::Consumer::TopicPartitionList]
51
+ def on_partitions_assigned(_, partitions)
52
+ @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
53
+ @changed = true
54
+ end
55
+
56
+ # Callback that kicks in inside of rdkafka, when partitions are revoked.
57
+ #
58
+ # @private
59
+ # @param _ [Rdkafka::Consumer]
60
+ # @param partitions [Rdkafka::Consumer::TopicPartitionList]
61
+ def on_partitions_revoked(_, partitions)
62
+ @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
63
+ @changed = true
64
+ end
65
+
66
+ # We consider as lost only partitions that were taken away and not re-assigned back to us
67
+ def lost_partitions
68
+ lost_partitions = {}
69
+
70
+ revoked_partitions.each do |topic, partitions|
71
+ lost_partitions[topic] = partitions - assigned_partitions.fetch(topic, EMPTY_ARRAY)
72
+ end
73
+
74
+ lost_partitions
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Contracts
5
+ # Base contract for all Karafka contracts
6
+ class Base < ::Karafka::Core::Contractable::Contract
7
+ # @param data [Hash] data for validation
8
+ # @return [Boolean] true if all good
9
+ # @raise [Errors::InvalidConfigurationError] invalid configuration error
10
+ # @note We use contracts only in the config validation context, so no need to add support
11
+ # for multiple error classes. It will be added when it will be needed.
12
+ def validate!(data)
13
+ super(data, Errors::InvalidConfigurationError)
14
+ end
15
+ end
16
+ end
17
+ end