karafka 1.4.0 → 2.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +89 -18
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +365 -1
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +56 -112
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +61 -68
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +272 -0
  16. data/bin/karafka +10 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/certs/cert_chain.pem +26 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +59 -38
  23. data/docker-compose.yml +10 -3
  24. data/karafka.gemspec +18 -21
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +33 -0
  32. data/lib/karafka/admin.rb +63 -0
  33. data/lib/karafka/app.rb +15 -20
  34. data/lib/karafka/base_consumer.rb +197 -31
  35. data/lib/karafka/cli/info.rb +44 -10
  36. data/lib/karafka/cli/install.rb +22 -12
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -3
  39. data/lib/karafka/connection/client.rb +379 -89
  40. data/lib/karafka/connection/listener.rb +250 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -184
  49. data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger.rb +6 -10
  60. data/lib/karafka/instrumentation/logger_listener.rb +174 -0
  61. data/lib/karafka/instrumentation/monitor.rb +13 -61
  62. data/lib/karafka/instrumentation/notifications.rb +53 -0
  63. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  64. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  65. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  66. data/lib/karafka/instrumentation.rb +21 -0
  67. data/lib/karafka/licenser.rb +75 -0
  68. data/lib/karafka/messages/batch_metadata.rb +45 -0
  69. data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
  70. data/lib/karafka/messages/builders/message.rb +39 -0
  71. data/lib/karafka/messages/builders/messages.rb +34 -0
  72. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  73. data/lib/karafka/messages/messages.rb +64 -0
  74. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  75. data/lib/karafka/messages/seek.rb +9 -0
  76. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  77. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  78. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  79. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  80. data/lib/karafka/pro/base_consumer.rb +107 -0
  81. data/lib/karafka/pro/contracts/base.rb +21 -0
  82. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  83. data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
  84. data/lib/karafka/pro/loader.rb +76 -0
  85. data/lib/karafka/pro/performance_tracker.rb +80 -0
  86. data/lib/karafka/pro/processing/coordinator.rb +85 -0
  87. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  88. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  89. data/lib/karafka/pro/processing/partitioner.rb +58 -0
  90. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  91. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  92. data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
  93. data/lib/karafka/pro.rb +13 -0
  94. data/lib/karafka/process.rb +1 -0
  95. data/lib/karafka/processing/coordinator.rb +103 -0
  96. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  97. data/lib/karafka/processing/executor.rb +126 -0
  98. data/lib/karafka/processing/executors_buffer.rb +88 -0
  99. data/lib/karafka/processing/jobs/base.rb +55 -0
  100. data/lib/karafka/processing/jobs/consume.rb +47 -0
  101. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  102. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  103. data/lib/karafka/processing/jobs_builder.rb +29 -0
  104. data/lib/karafka/processing/jobs_queue.rb +144 -0
  105. data/lib/karafka/processing/partitioner.rb +22 -0
  106. data/lib/karafka/processing/result.rb +37 -0
  107. data/lib/karafka/processing/scheduler.rb +22 -0
  108. data/lib/karafka/processing/worker.rb +91 -0
  109. data/lib/karafka/processing/workers_batch.rb +27 -0
  110. data/lib/karafka/railtie.rb +127 -0
  111. data/lib/karafka/routing/builder.rb +26 -23
  112. data/lib/karafka/routing/consumer_group.rb +37 -17
  113. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  114. data/lib/karafka/routing/proxy.rb +9 -16
  115. data/lib/karafka/routing/router.rb +1 -1
  116. data/lib/karafka/routing/subscription_group.rb +53 -0
  117. data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
  118. data/lib/karafka/routing/topic.rb +65 -24
  119. data/lib/karafka/routing/topics.rb +38 -0
  120. data/lib/karafka/runner.rb +51 -0
  121. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  122. data/lib/karafka/server.rb +67 -26
  123. data/lib/karafka/setup/config.rb +153 -175
  124. data/lib/karafka/status.rb +14 -5
  125. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  126. data/lib/karafka/templates/karafka.rb.erb +17 -55
  127. data/lib/karafka/time_trackers/base.rb +19 -0
  128. data/lib/karafka/time_trackers/pause.rb +92 -0
  129. data/lib/karafka/time_trackers/poll.rb +65 -0
  130. data/lib/karafka/version.rb +1 -1
  131. data/lib/karafka.rb +46 -16
  132. data.tar.gz.sig +0 -0
  133. metadata +145 -171
  134. metadata.gz.sig +0 -0
  135. data/.github/FUNDING.yml +0 -3
  136. data/MIT-LICENCE +0 -18
  137. data/certs/mensfeld.pem +0 -25
  138. data/lib/karafka/attributes_map.rb +0 -62
  139. data/lib/karafka/backends/inline.rb +0 -16
  140. data/lib/karafka/base_responder.rb +0 -226
  141. data/lib/karafka/cli/flow.rb +0 -48
  142. data/lib/karafka/code_reloader.rb +0 -67
  143. data/lib/karafka/connection/api_adapter.rb +0 -161
  144. data/lib/karafka/connection/batch_delegator.rb +0 -55
  145. data/lib/karafka/connection/builder.rb +0 -18
  146. data/lib/karafka/connection/message_delegator.rb +0 -36
  147. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  148. data/lib/karafka/consumers/callbacks.rb +0 -71
  149. data/lib/karafka/consumers/includer.rb +0 -64
  150. data/lib/karafka/consumers/responders.rb +0 -24
  151. data/lib/karafka/consumers/single_params.rb +0 -15
  152. data/lib/karafka/contracts/responder_usage.rb +0 -54
  153. data/lib/karafka/fetcher.rb +0 -42
  154. data/lib/karafka/helpers/class_matcher.rb +0 -88
  155. data/lib/karafka/helpers/config_retriever.rb +0 -46
  156. data/lib/karafka/helpers/inflector.rb +0 -26
  157. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  158. data/lib/karafka/params/batch_metadata.rb +0 -26
  159. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  160. data/lib/karafka/params/builders/params.rb +0 -38
  161. data/lib/karafka/params/builders/params_batch.rb +0 -25
  162. data/lib/karafka/params/params_batch.rb +0 -60
  163. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  164. data/lib/karafka/persistence/client.rb +0 -29
  165. data/lib/karafka/persistence/consumers.rb +0 -45
  166. data/lib/karafka/persistence/topics.rb +0 -48
  167. data/lib/karafka/responders/builder.rb +0 -36
  168. data/lib/karafka/responders/topic.rb +0 -55
  169. data/lib/karafka/routing/topic_mapper.rb +0 -53
  170. data/lib/karafka/serialization/json/serializer.rb +0 -31
  171. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  172. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -1,116 +1,406 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
+ # Namespace for Kafka connection related logic
4
5
  module Connection
5
- # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
- # features that we provide/might provide in future and to hide the internal implementation
6
+ # An abstraction layer on top of the rdkafka consumer.
7
+ #
8
+ # It is threadsafe and provides some security measures so we won't end up operating on a
9
+ # closed consumer instance as it causes Ruby VM process to crash.
7
10
  class Client
8
- extend Forwardable
9
-
10
- %i[
11
- seek
12
- trigger_heartbeat
13
- trigger_heartbeat!
14
- ].each do |delegated_method|
15
- def_delegator :kafka_consumer, delegated_method
16
- end
17
-
18
- # Creates a queue consumer client that will pull the data from Kafka
19
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
20
- # we create a client
21
- # @return [Karafka::Connection::Client] group consumer that can subscribe to
22
- # multiple topics
23
- def initialize(consumer_group)
24
- @consumer_group = consumer_group
25
- Persistence::Client.write(self)
26
- end
27
-
28
- # Opens connection, gets messages and calls a block for each of the incoming messages
29
- # @yieldparam [Array<Kafka::FetchedMessage>, Symbol] kafka response with an info about
30
- # the type of the fetcher that is being used
31
- # @note This will yield with raw messages - no preprocessing or reformatting.
32
- def fetch_loop
33
- settings = ApiAdapter.consumption(consumer_group)
34
-
35
- if consumer_group.batch_fetching
36
- kafka_consumer.each_batch(*settings) { |batch| yield(batch, :batch) }
37
- else
38
- kafka_consumer.each_message(*settings) { |message| yield(message, :message) }
11
+ attr_reader :rebalance_manager
12
+
13
+ # @return [String] underlying consumer name
14
+ # @note Consumer name may change in case we regenerate it
15
+ attr_reader :name
16
+
17
+ # How many times should we retry polling in case of a failure
18
+ MAX_POLL_RETRIES = 20
19
+
20
+ private_constant :MAX_POLL_RETRIES
21
+
22
+ # Creates a new consumer instance.
23
+ #
24
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
25
+ # with all the configuration details needed for us to create a client
26
+ # @return [Karafka::Connection::Rdk::Consumer]
27
+ def initialize(subscription_group)
28
+ # Name is set when we build consumer
29
+ @name = ''
30
+ @mutex = Mutex.new
31
+ @closed = false
32
+ @subscription_group = subscription_group
33
+ @buffer = RawMessagesBuffer.new
34
+ @rebalance_manager = RebalanceManager.new
35
+ @kafka = build_consumer
36
+ # Marks if we need to offset. If we did not store offsets, we should not commit the offset
37
+ # position as it will crash rdkafka
38
+ @offsetting = false
39
+ # We need to keep track of what we have paused for resuming
40
+ # In case we loose partition, we still need to resume it, otherwise it won't be fetched
41
+ # again if we get reassigned to it later on. We need to keep them as after revocation we
42
+ # no longer may be able to fetch them from Kafka. We could build them but it is easier
43
+ # to just keep them here and use if needed when cannot be obtained
44
+ @paused_tpls = Hash.new { |h, k| h[k] = {} }
45
+ end
46
+
47
+ # Fetches messages within boundaries defined by the settings (time, size, topics, etc).
48
+ #
49
+ # @return [Karafka::Connection::MessagesBuffer] messages buffer that holds messages per topic
50
+ # partition
51
+ # @note This method should not be executed from many threads at the same time
52
+ def batch_poll
53
+ time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
54
+
55
+ @buffer.clear
56
+ @rebalance_manager.clear
57
+
58
+ loop do
59
+ time_poll.start
60
+
61
+ # Don't fetch more messages if we do not have any time left
62
+ break if time_poll.exceeded?
63
+ # Don't fetch more messages if we've fetched max as we've wanted
64
+ break if @buffer.size >= @subscription_group.max_messages
65
+
66
+ # Fetch message within our time boundaries
67
+ message = poll(time_poll.remaining)
68
+
69
+ # Put a message to the buffer if there is one
70
+ @buffer << message if message
71
+
72
+ # Upon polling rebalance manager might have been updated.
73
+ # If partition revocation happens, we need to remove messages from revoked partitions
74
+ # as well as ensure we do not have duplicated due to the offset reset for partitions
75
+ # that we got assigned
76
+ # We also do early break, so the information about rebalance is used as soon as possible
77
+ if @rebalance_manager.changed?
78
+ remove_revoked_and_duplicated_messages
79
+ break
80
+ end
81
+
82
+ # Track time spent on all of the processing and polling
83
+ time_poll.checkpoint
84
+
85
+ # Finally once we've (potentially) removed revoked, etc, if no messages were returned
86
+ # we can break.
87
+ # Worth keeping in mind, that the rebalance manager might have been updated despite no
88
+ # messages being returned during a poll
89
+ break unless message
39
90
  end
40
- # @note We catch only the processing errors as any other are considered critical (exceptions)
41
- # and should require a client restart with a backoff
42
- rescue Kafka::ProcessingError => e
43
- # If there was an error during consumption, we have to log it, pause current partition
44
- # and process other things
45
- Karafka.monitor.instrument(
46
- 'connection.client.fetch_loop.error',
47
- caller: self,
48
- error: e.cause
49
- )
50
- pause(e.topic, e.partition)
51
- retry
91
+
92
+ @buffer
93
+ end
94
+
95
+ # Stores offset for a given partition of a given topic based on the provided message.
96
+ #
97
+ # @param message [Karafka::Messages::Message]
98
+ def store_offset(message)
99
+ @mutex.synchronize do
100
+ internal_store_offset(message)
101
+ end
102
+ end
103
+
104
+ # Commits the offset on a current consumer in a non-blocking or blocking way.
105
+ # Ignoring a case where there would not be an offset (for example when rebalance occurs).
106
+ #
107
+ # @param async [Boolean] should the commit happen async or sync (async by default)
108
+ # @return [Boolean] did committing was successful. It may be not, when we no longer own
109
+ # given partition.
110
+ #
111
+ # @note This will commit all the offsets for the whole consumer. In order to achieve
112
+ # granular control over where the offset should be for particular topic partitions, the
113
+ # store_offset should be used to only store new offset when we want to to be flushed
114
+ def commit_offsets(async: true)
115
+ @mutex.lock
116
+
117
+ internal_commit_offsets(async: async)
118
+ ensure
119
+ @mutex.unlock
120
+ end
121
+
122
+ # Commits offset in a synchronous way.
123
+ #
124
+ # @see `#commit_offset` for more details
125
+ def commit_offsets!
126
+ commit_offsets(async: false)
127
+ end
128
+
129
+ # Seek to a particular message. The next poll on the topic/partition will return the
130
+ # message at the given offset.
131
+ #
132
+ # @param message [Messages::Message, Messages::Seek] message to which we want to seek to
133
+ def seek(message)
134
+ @mutex.lock
135
+
136
+ @kafka.seek(message)
137
+ ensure
138
+ @mutex.unlock
139
+ end
140
+
141
+ # Pauses given partition and moves back to last successful offset processed.
142
+ #
143
+ # @param topic [String] topic name
144
+ # @param partition [Integer] partition
145
+ # @param offset [Integer] offset of the message on which we want to pause (this message will
146
+ # be reprocessed after getting back to processing)
147
+ # @note This will pause indefinitely and requires manual `#resume`
148
+ def pause(topic, partition, offset)
149
+ @mutex.lock
150
+
151
+ # Do not pause if the client got closed, would not change anything
152
+ return if @closed
153
+
154
+ pause_msg = Messages::Seek.new(topic, partition, offset)
155
+
156
+ internal_commit_offsets(async: true)
157
+
158
+ # Here we do not use our cached tpls because we should not try to pause something we do
159
+ # not own anymore.
160
+ tpl = topic_partition_list(topic, partition)
161
+
162
+ return unless tpl
163
+
164
+ @paused_tpls[topic][partition] = tpl
165
+
166
+ @kafka.pause(tpl)
167
+
168
+ @kafka.seek(pause_msg)
169
+ ensure
170
+ @mutex.unlock
171
+ end
172
+
173
+ # Resumes processing of a give topic partition after it was paused.
174
+ #
175
+ # @param topic [String] topic name
176
+ # @param partition [Integer] partition
177
+ def resume(topic, partition)
178
+ @mutex.lock
179
+
180
+ return if @closed
181
+
182
+ # Always commit synchronously offsets if any when we resume
183
+ # This prevents resuming without offset in case it would not be committed prior
184
+ # We can skip performance penalty since resuming should not happen too often
185
+ internal_commit_offsets(async: false)
186
+
187
+ # If we were not able, let's try to reuse the one we have (if we have)
188
+ tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
189
+
190
+ return unless tpl
191
+ # If we did not have it, it means we never paused this partition, thus no resume should
192
+ # happen in the first place
193
+ return unless @paused_tpls[topic].delete(partition)
194
+
195
+ @kafka.resume(tpl)
196
+ ensure
197
+ @mutex.unlock
52
198
  end
53
199
 
54
- # Gracefully stops topic consumption
200
+ # Gracefully stops topic consumption.
201
+ #
55
202
  # @note Stopping running consumers without a really important reason is not recommended
56
203
  # as until all the consumers are stopped, the server will keep running serving only
57
204
  # part of the messages
58
205
  def stop
59
- @kafka_consumer&.stop
60
- @kafka_consumer = nil
206
+ close
61
207
  end
62
208
 
63
- # Pauses fetching and consumption of a given topic partition
64
- # @param topic [String] topic that we want to pause
65
- # @param partition [Integer] number partition that we want to pause
66
- def pause(topic, partition)
67
- kafka_consumer.pause(*ApiAdapter.pause(topic, partition, consumer_group))
209
+ # Marks given message as consumed.
210
+ #
211
+ # @param [Karafka::Messages::Message] message that we want to mark as processed
212
+ # @return [Boolean] true if successful. False if we no longer own given partition
213
+ # @note This method won't trigger automatic offsets commits, rather relying on the offset
214
+ # check-pointing trigger that happens with each batch processed
215
+ def mark_as_consumed(message)
216
+ store_offset(message)
68
217
  end
69
218
 
70
- # Marks given message as consumed
71
- # @param [Karafka::Params::Params] params message that we want to mark as processed
72
- # @note This method won't trigger automatic offsets commits, rather relying on the ruby-kafka
73
- # offsets time-interval based committing
74
- def mark_as_consumed(params)
75
- kafka_consumer.mark_message_as_processed(
76
- *ApiAdapter.mark_message_as_processed(params)
77
- )
219
+ # Marks a given message as consumed and commits the offsets in a blocking way.
220
+ #
221
+ # @param [Karafka::Messages::Message] message that we want to mark as processed
222
+ # @return [Boolean] true if successful. False if we no longer own given partition
223
+ def mark_as_consumed!(message)
224
+ return false unless mark_as_consumed(message)
225
+
226
+ commit_offsets!
78
227
  end
79
228
 
80
- # Marks a given message as consumed and commit the offsets in a blocking way
81
- # @param [Karafka::Params::Params] params message that we want to mark as processed
82
- # @note This method commits the offset for each manual marking to be sure
83
- # that offset commit happen asap in case of a crash
84
- def mark_as_consumed!(params)
85
- mark_as_consumed(params)
86
- # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
87
- # before the automatic triggers have kicked in.
88
- kafka_consumer.commit_offsets
229
+ # Closes and resets the client completely.
230
+ def reset
231
+ close
232
+
233
+ @mutex.synchronize do
234
+ @closed = false
235
+ @offsetting = false
236
+ @paused_tpls.clear
237
+ @kafka = build_consumer
238
+ end
89
239
  end
90
240
 
91
241
  private
92
242
 
93
- attr_reader :consumer_group
94
-
95
- # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
96
- # that is set up to consume from topics of a given consumer group
97
- def kafka_consumer
98
- # @note We don't cache the connection internally because we cache kafka_consumer that uses
99
- # kafka client object instance
100
- @kafka_consumer ||= Builder.call(consumer_group).consumer(
101
- *ApiAdapter.consumer(consumer_group)
102
- ).tap do |consumer|
103
- consumer_group.topics.each do |topic|
104
- consumer.subscribe(*ApiAdapter.subscribe(topic))
243
+ # When we cannot store an offset, it means we no longer own the partition
244
+ #
245
+ # Non thread-safe offset storing method
246
+ # @param message [Karafka::Messages::Message]
247
+ # @return [Boolean] true if we could store the offset (if we still own the partition)
248
+ def internal_store_offset(message)
249
+ @offsetting = true
250
+ @kafka.store_offset(message)
251
+ true
252
+ rescue Rdkafka::RdkafkaError => e
253
+ return false if e.code == :assignment_lost
254
+ return false if e.code == :state
255
+
256
+ raise e
257
+ end
258
+
259
+ # Non thread-safe message committing method
260
+ # @param async [Boolean] should the commit happen async or sync (async by default)
261
+ # @return [Boolean] true if offset commit worked, false if we've lost the assignment
262
+ def internal_commit_offsets(async: true)
263
+ return true unless @offsetting
264
+
265
+ @kafka.commit(nil, async)
266
+ @offsetting = false
267
+
268
+ true
269
+ rescue Rdkafka::RdkafkaError => e
270
+ case e.code
271
+ when :assignment_lost
272
+ return false
273
+ when :no_offset
274
+ return true
275
+ when :coordinator_load_in_progress
276
+ sleep(1)
277
+ retry
278
+ end
279
+
280
+ raise e
281
+ end
282
+
283
+ # Commits the stored offsets in a sync way and closes the consumer.
284
+ def close
285
+ @mutex.synchronize do
286
+ # Once client is closed, we should not close it again
287
+ # This could only happen in case of a race-condition when forceful shutdown happens
288
+ # and triggers this from a different thread
289
+ return if @closed
290
+
291
+ @closed = true
292
+
293
+ internal_commit_offsets(async: false)
294
+
295
+ # Remove callbacks runners that were registered
296
+ ::Karafka::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
297
+ ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
298
+
299
+ @kafka.close
300
+ @buffer.clear
301
+ # @note We do not clear rebalance manager here as we may still have revocation info here
302
+ # that we want to consider valid prior to running another reconnection
303
+ end
304
+ end
305
+
306
+ # @param topic [String]
307
+ # @param partition [Integer]
308
+ # @return [Rdkafka::Consumer::TopicPartitionList]
309
+ def topic_partition_list(topic, partition)
310
+ rdkafka_partition = @kafka
311
+ .assignment
312
+ .to_h[topic]
313
+ &.detect { |part| part.partition == partition }
314
+
315
+ return unless rdkafka_partition
316
+
317
+ Rdkafka::Consumer::TopicPartitionList.new({ topic => [rdkafka_partition] })
318
+ end
319
+
320
+ # Performs a single poll operation.
321
+ #
322
+ # @param timeout [Integer] timeout for a single poll
323
+ # @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
324
+ def poll(timeout)
325
+ time_poll ||= TimeTrackers::Poll.new(timeout)
326
+
327
+ return nil if time_poll.exceeded?
328
+
329
+ time_poll.start
330
+
331
+ @kafka.poll(timeout)
332
+ rescue ::Rdkafka::RdkafkaError => e
333
+ # Most of the errors can be safely ignored as librdkafka will recover from them
334
+ # @see https://github.com/edenhill/librdkafka/issues/1987#issuecomment-422008750
335
+ # @see https://github.com/edenhill/librdkafka/wiki/Error-handling
336
+ if time_poll.attempts > MAX_POLL_RETRIES || !time_poll.retryable?
337
+ Karafka.monitor.instrument(
338
+ 'error.occurred',
339
+ caller: self,
340
+ error: e,
341
+ type: 'connection.client.poll.error'
342
+ )
343
+
344
+ raise
345
+ end
346
+
347
+ time_poll.checkpoint
348
+ time_poll.backoff
349
+
350
+ # poll may not only return message but also can run callbacks and if they changed,
351
+ # despite the errors we need to delegate to the other app parts
352
+ @rebalance_manager.changed? ? nil : retry
353
+ end
354
+
355
+ # Builds a new rdkafka consumer instance based on the subscription group configuration
356
+ # @return [Rdkafka::Consumer]
357
+ def build_consumer
358
+ ::Rdkafka::Config.logger = ::Karafka::App.config.logger
359
+ config = ::Rdkafka::Config.new(@subscription_group.kafka)
360
+ config.consumer_rebalance_listener = @rebalance_manager
361
+ consumer = config.consumer
362
+ @name = consumer.name
363
+
364
+ # Register statistics runner for this particular type of callbacks
365
+ ::Karafka::Instrumentation.statistics_callbacks.add(
366
+ @subscription_group.id,
367
+ Instrumentation::Callbacks::Statistics.new(
368
+ @subscription_group.id,
369
+ @subscription_group.consumer_group_id,
370
+ @name,
371
+ ::Karafka::App.config.monitor
372
+ )
373
+ )
374
+
375
+ # Register error tracking callback
376
+ ::Karafka::Instrumentation.error_callbacks.add(
377
+ @subscription_group.id,
378
+ Instrumentation::Callbacks::Error.new(
379
+ @subscription_group.id,
380
+ @subscription_group.consumer_group_id,
381
+ @name,
382
+ ::Karafka::App.config.monitor
383
+ )
384
+ )
385
+
386
+ # Subscription needs to happen after we assigned the rebalance callbacks just in case of
387
+ # a race condition
388
+ consumer.subscribe(*@subscription_group.topics.map(&:name))
389
+ consumer
390
+ end
391
+
392
+ # We may have a case where in the middle of data polling, we've lost a partition.
393
+ # In a case like this we should remove all the pre-buffered messages from list partitions as
394
+ # we are no longer responsible in a given process for processing those messages and they
395
+ # should have been picked up by a different process.
396
+ def remove_revoked_and_duplicated_messages
397
+ @rebalance_manager.lost_partitions.each do |topic, partitions|
398
+ partitions.each do |partition|
399
+ @buffer.delete(topic, partition)
105
400
  end
106
401
  end
107
- rescue Kafka::ConnectionError
108
- # If we would not wait it will spam log file with failed
109
- # attempts if Kafka is down
110
- sleep(consumer_group.reconnect_timeout)
111
- # We don't log and just re-raise - this will be logged
112
- # down the road
113
- raise
402
+
403
+ @buffer.uniq!
114
404
  end
115
405
  end
116
406
  end