karafka 1.4.12 → 2.0.0.alpha1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/FUNDING.yml +3 -0
  4. data/.github/workflows/ci.yml +74 -25
  5. data/CHANGELOG.md +38 -3
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +32 -34
  8. data/LICENSE +14 -0
  9. data/LICENSE-COMM +89 -0
  10. data/LICENSE-LGPL +165 -0
  11. data/README.md +16 -48
  12. data/bin/benchmarks +85 -0
  13. data/bin/create_token +28 -0
  14. data/bin/integrations +160 -0
  15. data/bin/stress +13 -0
  16. data/certs/karafka-pro.pem +11 -0
  17. data/config/errors.yml +4 -38
  18. data/docker-compose.yml +11 -3
  19. data/karafka.gemspec +13 -13
  20. data/lib/active_job/consumer.rb +22 -0
  21. data/lib/active_job/karafka.rb +18 -0
  22. data/lib/active_job/queue_adapters/karafka_adapter.rb +29 -0
  23. data/lib/active_job/routing_extensions.rb +15 -0
  24. data/lib/karafka/app.rb +13 -20
  25. data/lib/karafka/base_consumer.rb +103 -34
  26. data/lib/karafka/cli/base.rb +4 -4
  27. data/lib/karafka/cli/info.rb +43 -8
  28. data/lib/karafka/cli/install.rb +3 -8
  29. data/lib/karafka/cli/server.rb +17 -30
  30. data/lib/karafka/cli.rb +4 -11
  31. data/lib/karafka/connection/client.rb +279 -93
  32. data/lib/karafka/connection/listener.rb +137 -38
  33. data/lib/karafka/connection/messages_buffer.rb +57 -0
  34. data/lib/karafka/connection/pauses_manager.rb +46 -0
  35. data/lib/karafka/connection/rebalance_manager.rb +62 -0
  36. data/lib/karafka/contracts/config.rb +25 -7
  37. data/lib/karafka/contracts/consumer_group.rb +0 -173
  38. data/lib/karafka/contracts/consumer_group_topic.rb +17 -7
  39. data/lib/karafka/contracts/server_cli_options.rb +1 -9
  40. data/lib/karafka/contracts.rb +1 -1
  41. data/lib/karafka/env.rb +46 -0
  42. data/lib/karafka/errors.rb +14 -18
  43. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  44. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  45. data/lib/karafka/instrumentation/callbacks/statistics.rb +42 -0
  46. data/lib/karafka/instrumentation/monitor.rb +14 -21
  47. data/lib/karafka/instrumentation/stdout_listener.rb +64 -91
  48. data/lib/karafka/instrumentation.rb +21 -0
  49. data/lib/karafka/licenser.rb +65 -0
  50. data/lib/karafka/{params → messages}/batch_metadata.rb +7 -13
  51. data/lib/karafka/messages/builders/batch_metadata.rb +30 -0
  52. data/lib/karafka/messages/builders/message.rb +38 -0
  53. data/lib/karafka/messages/builders/messages.rb +40 -0
  54. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  55. data/lib/karafka/messages/messages.rb +64 -0
  56. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  57. data/lib/karafka/messages/seek.rb +9 -0
  58. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  59. data/lib/karafka/processing/executor.rb +96 -0
  60. data/lib/karafka/processing/executors_buffer.rb +49 -0
  61. data/lib/karafka/processing/jobs/base.rb +18 -0
  62. data/lib/karafka/processing/jobs/consume.rb +28 -0
  63. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  64. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  65. data/lib/karafka/processing/jobs_queue.rb +121 -0
  66. data/lib/karafka/processing/worker.rb +57 -0
  67. data/lib/karafka/processing/workers_batch.rb +22 -0
  68. data/lib/karafka/railtie.rb +65 -0
  69. data/lib/karafka/routing/builder.rb +15 -14
  70. data/lib/karafka/routing/consumer_group.rb +10 -18
  71. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  72. data/lib/karafka/routing/router.rb +1 -1
  73. data/lib/karafka/routing/subscription_group.rb +53 -0
  74. data/lib/karafka/routing/subscription_groups_builder.rb +51 -0
  75. data/lib/karafka/routing/topic.rb +47 -25
  76. data/lib/karafka/runner.rb +59 -0
  77. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  78. data/lib/karafka/server.rb +62 -25
  79. data/lib/karafka/setup/config.rb +86 -159
  80. data/lib/karafka/status.rb +13 -3
  81. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  82. data/lib/karafka/templates/karafka.rb.erb +14 -50
  83. data/lib/karafka/time_trackers/base.rb +19 -0
  84. data/lib/karafka/time_trackers/pause.rb +84 -0
  85. data/lib/karafka/time_trackers/poll.rb +65 -0
  86. data/lib/karafka/version.rb +1 -1
  87. data/lib/karafka.rb +30 -13
  88. data.tar.gz.sig +0 -0
  89. metadata +70 -87
  90. metadata.gz.sig +0 -0
  91. data/MIT-LICENCE +0 -18
  92. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  93. data/lib/karafka/attributes_map.rb +0 -63
  94. data/lib/karafka/backends/inline.rb +0 -16
  95. data/lib/karafka/base_responder.rb +0 -226
  96. data/lib/karafka/cli/flow.rb +0 -48
  97. data/lib/karafka/cli/missingno.rb +0 -19
  98. data/lib/karafka/code_reloader.rb +0 -67
  99. data/lib/karafka/connection/api_adapter.rb +0 -158
  100. data/lib/karafka/connection/batch_delegator.rb +0 -55
  101. data/lib/karafka/connection/builder.rb +0 -23
  102. data/lib/karafka/connection/message_delegator.rb +0 -36
  103. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  104. data/lib/karafka/consumers/callbacks.rb +0 -71
  105. data/lib/karafka/consumers/includer.rb +0 -64
  106. data/lib/karafka/consumers/responders.rb +0 -24
  107. data/lib/karafka/consumers/single_params.rb +0 -15
  108. data/lib/karafka/contracts/responder_usage.rb +0 -54
  109. data/lib/karafka/fetcher.rb +0 -42
  110. data/lib/karafka/helpers/class_matcher.rb +0 -88
  111. data/lib/karafka/helpers/config_retriever.rb +0 -46
  112. data/lib/karafka/helpers/inflector.rb +0 -26
  113. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  114. data/lib/karafka/params/builders/params.rb +0 -38
  115. data/lib/karafka/params/builders/params_batch.rb +0 -25
  116. data/lib/karafka/params/params_batch.rb +0 -60
  117. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  118. data/lib/karafka/persistence/client.rb +0 -29
  119. data/lib/karafka/persistence/consumers.rb +0 -45
  120. data/lib/karafka/persistence/topics.rb +0 -48
  121. data/lib/karafka/responders/builder.rb +0 -36
  122. data/lib/karafka/responders/topic.rb +0 -55
  123. data/lib/karafka/routing/topic_mapper.rb +0 -53
  124. data/lib/karafka/serialization/json/serializer.rb +0 -31
  125. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  126. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -1,119 +1,305 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
+ # Namespace for Kafka connection related logic
4
5
  module Connection
5
- # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
- # features that we provide/might provide in future and to hide the internal implementation
6
+ # An abstraction layer on top of the rdkafka consumer.
7
+ #
8
+ # It is threadsafe and provides some security measures so we won't end up operating on a
9
+ # closed consumer instance as it causes Ruby VM process to crash.
7
10
  class Client
8
- extend Forwardable
9
-
10
- %i[
11
- seek
12
- trigger_heartbeat
13
- trigger_heartbeat!
14
- ].each do |delegated_method|
15
- def_delegator :kafka_consumer, delegated_method
16
- end
17
-
18
- # Creates a queue consumer client that will pull the data from Kafka
19
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
20
- # we create a client
21
- # @return [Karafka::Connection::Client] group consumer that can subscribe to
22
- # multiple topics
23
- def initialize(consumer_group)
24
- @consumer_group = consumer_group
25
- Persistence::Client.write(self)
26
- end
27
-
28
- # Opens connection, gets messages and calls a block for each of the incoming messages
29
- # @yieldparam [Array<Kafka::FetchedMessage>, Symbol] kafka response with an info about
30
- # the type of the fetcher that is being used
31
- # @note This will yield with raw messages - no preprocessing or reformatting.
32
- def fetch_loop
33
- settings = ApiAdapter.consumption(consumer_group)
34
-
35
- if consumer_group.batch_fetching
36
- kafka_consumer.each_batch(**settings) { |batch| yield(batch, :batch) }
37
- else
38
- kafka_consumer.each_message(**settings) { |message| yield(message, :message) }
11
+ attr_reader :rebalance_manager
12
+
13
+ # @return [String] underlying consumer name
14
+ # @note Consumer name may change in case we regenerate it
15
+ attr_reader :name
16
+
17
+ # How many times should we retry polling in case of a failure
18
+ MAX_POLL_RETRIES = 10
19
+
20
+ private_constant :MAX_POLL_RETRIES
21
+
22
+ # Creates a new consumer instance.
23
+ #
24
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
25
+ # with all the configuration details needed for us to create a client
26
+ # @return [Karafka::Connection::Rdk::Consumer]
27
+ def initialize(subscription_group)
28
+ # Name is set when we build consumer
29
+ @name = ''
30
+ @mutex = Mutex.new
31
+ @closed = false
32
+ @subscription_group = subscription_group
33
+ @buffer = MessagesBuffer.new
34
+ @rebalance_manager = RebalanceManager.new
35
+ @kafka = build_consumer
36
+ # Marks if we need to offset. If we did not store offsets, we should not commit the offset
37
+ # position as it will crash rdkafka
38
+ @offsetting = false
39
+ end
40
+
41
+ # Fetches messages within boundaries defined by the settings (time, size, topics, etc).
42
+ #
43
+ # @return [Karafka::Connection::MessagesBuffer] messages buffer that holds messages per topic
44
+ # partition
45
+ # @note This method should not be executed from many threads at the same time
46
+ def batch_poll
47
+ time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
48
+ time_poll.start
49
+
50
+ @buffer.clear
51
+
52
+ loop do
53
+ # Don't fetch more messages if we do not have any time left
54
+ break if time_poll.exceeded?
55
+ # Don't fetch more messages if we've fetched max as we've wanted
56
+ break if @buffer.size >= @subscription_group.max_messages
57
+
58
+ # Fetch message within our time boundaries
59
+ message = poll(time_poll.remaining)
60
+
61
+ # If there are no more messages, return what we have
62
+ break unless message
63
+
64
+ @buffer << message
65
+
66
+ # Track time spent on all of the processing and polling
67
+ time_poll.checkpoint
39
68
  end
40
- # @note We catch only the processing errors as any other are considered critical (exceptions)
41
- # and should require a client restart with a backoff
42
- rescue Kafka::ProcessingError => e
43
- # If there was an error during consumption, we have to log it, pause current partition
44
- # and process other things
45
- Karafka.monitor.instrument(
46
- 'connection.client.fetch_loop.error',
47
- caller: self,
48
- error: e.cause
49
- )
50
- pause(e.topic, e.partition)
51
- retry
69
+
70
+ @buffer
71
+ end
72
+
73
+ # Stores offset for a given partition of a given topic based on the provided message.
74
+ #
75
+ # @param message [Karafka::Messages::Message]
76
+ def store_offset(message)
77
+ @mutex.synchronize do
78
+ @offsetting = true
79
+ @kafka.store_offset(message)
80
+ end
81
+ end
82
+
83
+ # Commits the offset on a current consumer in a non-blocking or blocking way.
84
+ # Ignoring a case where there would not be an offset (for example when rebalance occurs).
85
+ #
86
+ # @param async [Boolean] should the commit happen async or sync (async by default)
87
+ # @note This will commit all the offsets for the whole consumer. In order to achieve
88
+ # granular control over where the offset should be for particular topic partitions, the
89
+ # store_offset should be used to only store new offset when we want to to be flushed
90
+ def commit_offsets(async: true)
91
+ @mutex.lock
92
+
93
+ return unless @offsetting
94
+
95
+ @kafka.commit(nil, async)
96
+ @offsetting = false
97
+ rescue Rdkafka::RdkafkaError => e
98
+ return if e.code == :no_offset
99
+
100
+ raise e
101
+ ensure
102
+ @mutex.unlock
103
+ end
104
+
105
+ # Commits offset in a synchronous way.
106
+ #
107
+ # @see `#commit_offset` for more details
108
+ def commit_offsets!
109
+ commit_offsets(async: false)
110
+ end
111
+
112
+ # Seek to a particular message. The next poll on the topic/partition will return the
113
+ # message at the given offset.
114
+ #
115
+ # @param message [Messages::Message, Messages::Seek] message to which we want to seek to
116
+ def seek(message)
117
+ @kafka.seek(message)
118
+ end
119
+
120
+ # Pauses given partition and moves back to last successful offset processed.
121
+ #
122
+ # @param topic [String] topic name
123
+ # @param partition [Integer] partition
124
+ # @param offset [Integer] offset of the message on which we want to pause (this message will
125
+ # be reprocessed after getting back to processing)
126
+ # @note This will pause indefinitely and requires manual `#resume`
127
+ def pause(topic, partition, offset)
128
+ @mutex.lock
129
+
130
+ # Do not pause if the client got closed, would not change anything
131
+ return if @closed
132
+
133
+ tpl = topic_partition_list(topic, partition)
134
+
135
+ return unless tpl
136
+
137
+ @kafka.pause(tpl)
138
+
139
+ pause_msg = Messages::Seek.new(topic, partition, offset)
140
+
141
+ seek(pause_msg)
142
+ ensure
143
+ @mutex.unlock
52
144
  end
53
145
 
54
- # Gracefully stops topic consumption
146
+ # Resumes processing of a give topic partition after it was paused.
147
+ #
148
+ # @param topic [String] topic name
149
+ # @param partition [Integer] partition
150
+ def resume(topic, partition)
151
+ @mutex.lock
152
+
153
+ return if @closed
154
+
155
+ tpl = topic_partition_list(topic, partition)
156
+
157
+ return unless tpl
158
+
159
+ @kafka.resume(tpl)
160
+ ensure
161
+ @mutex.unlock
162
+ end
163
+
164
+ # Gracefully stops topic consumption.
165
+ #
55
166
  # @note Stopping running consumers without a really important reason is not recommended
56
167
  # as until all the consumers are stopped, the server will keep running serving only
57
168
  # part of the messages
58
169
  def stop
59
- @kafka_consumer&.stop
60
- @kafka_consumer = nil
170
+ close
61
171
  end
62
172
 
63
- # Pauses fetching and consumption of a given topic partition
64
- # @param topic [String] topic that we want to pause
65
- # @param partition [Integer] number partition that we want to pause
66
- def pause(topic, partition)
67
- args, kwargs = ApiAdapter.pause(topic, partition, consumer_group).values_at(:args, :kwargs)
68
- kafka_consumer.pause(*args, **kwargs)
173
+ # Marks given message as consumed.
174
+ #
175
+ # @param [Karafka::Messages::Message] message that we want to mark as processed
176
+ # @note This method won't trigger automatic offsets commits, rather relying on the offset
177
+ # check-pointing trigger that happens with each batch processed
178
+ def mark_as_consumed(message)
179
+ store_offset(message)
69
180
  end
70
181
 
71
- # Marks given message as consumed
72
- # @param [Karafka::Params::Params] params message that we want to mark as processed
73
- # @note This method won't trigger automatic offsets commits, rather relying on the ruby-kafka
74
- # offsets time-interval based committing
75
- def mark_as_consumed(params)
76
- kafka_consumer.mark_message_as_processed(
77
- *ApiAdapter.mark_message_as_processed(params)
78
- )
182
+ # Marks a given message as consumed and commits the offsets in a blocking way.
183
+ #
184
+ # @param [Karafka::Messages::Message] message that we want to mark as processed
185
+ def mark_as_consumed!(message)
186
+ mark_as_consumed(message)
187
+ commit_offsets!
79
188
  end
80
189
 
81
- # Marks a given message as consumed and commit the offsets in a blocking way
82
- # @param [Karafka::Params::Params] params message that we want to mark as processed
83
- # @note This method commits the offset for each manual marking to be sure
84
- # that offset commit happen asap in case of a crash
85
- def mark_as_consumed!(params)
86
- mark_as_consumed(params)
87
- # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
88
- # before the automatic triggers have kicked in.
89
- kafka_consumer.commit_offsets
190
+ # Closes and resets the client completely.
191
+ def reset
192
+ close
193
+
194
+ @mutex.synchronize do
195
+ @closed = false
196
+ @offsetting = false
197
+ @kafka = build_consumer
198
+ end
90
199
  end
91
200
 
92
201
  private
93
202
 
94
- attr_reader :consumer_group
95
-
96
- # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
97
- # that is set up to consume from topics of a given consumer group
98
- def kafka_consumer
99
- # @note We don't cache the connection internally because we cache kafka_consumer that uses
100
- # kafka client object instance
101
- @kafka_consumer ||= Builder.call(consumer_group).consumer(
102
- **ApiAdapter.consumer(consumer_group)
103
- ).tap do |consumer|
104
- consumer_group.topics.each do |topic|
105
- settings = ApiAdapter.subscribe(topic)
106
-
107
- consumer.subscribe(settings[0], **settings[1])
108
- end
203
+ # Commits the stored offsets in a sync way and closes the consumer.
204
+ def close
205
+ commit_offsets!
206
+
207
+ @mutex.synchronize do
208
+ @closed = true
209
+
210
+ # Remove callbacks runners that were registered
211
+ ::Karafka::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
212
+ ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
213
+
214
+ @kafka.close
109
215
  end
110
- rescue Kafka::ConnectionError
111
- # If we would not wait it will spam log file with failed
112
- # attempts if Kafka is down
113
- sleep(consumer_group.reconnect_timeout)
114
- # We don't log and just re-raise - this will be logged
115
- # down the road
116
- raise
216
+ end
217
+
218
+ # @param topic [String]
219
+ # @param partition [Integer]
220
+ # @return [Rdkafka::Consumer::TopicPartitionList]
221
+ def topic_partition_list(topic, partition)
222
+ rdkafka_partition = @kafka
223
+ .assignment
224
+ .to_h[topic]
225
+ &.detect { |part| part.partition == partition }
226
+
227
+ return unless rdkafka_partition
228
+
229
+ Rdkafka::Consumer::TopicPartitionList.new({ topic => [rdkafka_partition] })
230
+ end
231
+
232
+ # Performs a single poll operation.
233
+ #
234
+ # @param timeout [Integer] timeout for a single poll
235
+ # @return [Array<Rdkafka::Consumer::Message>, nil] fetched messages or nil if nothing polled
236
+ def poll(timeout)
237
+ time_poll ||= TimeTrackers::Poll.new(timeout)
238
+
239
+ return nil if time_poll.exceeded?
240
+
241
+ time_poll.start
242
+
243
+ @kafka.poll(time_poll.remaining)
244
+ rescue ::Rdkafka::RdkafkaError => e
245
+ raise if time_poll.attempts > MAX_POLL_RETRIES
246
+ raise unless time_poll.retryable?
247
+
248
+ case e.code
249
+ when :max_poll_exceeded # -147
250
+ reset
251
+ when :transport # -195
252
+ reset
253
+ when :rebalance_in_progress # -27
254
+ reset
255
+ when :not_coordinator # 16
256
+ reset
257
+ when :network_exception # 13
258
+ reset
259
+ end
260
+
261
+ time_poll.checkpoint
262
+
263
+ raise unless time_poll.retryable?
264
+
265
+ time_poll.backoff
266
+
267
+ retry
268
+ end
269
+
270
+ # Builds a new rdkafka consumer instance based on the subscription group configuration
271
+ # @return [Rdkafka::Consumer]
272
+ def build_consumer
273
+ ::Rdkafka::Config.logger = ::Karafka::App.config.logger
274
+ config = ::Rdkafka::Config.new(@subscription_group.kafka)
275
+ config.consumer_rebalance_listener = @rebalance_manager
276
+ consumer = config.consumer
277
+ consumer.subscribe(*@subscription_group.topics.map(&:name))
278
+ @name = consumer.name
279
+
280
+ # Register statistics runner for this particular type of callbacks
281
+ ::Karafka::Instrumentation.statistics_callbacks.add(
282
+ @subscription_group.id,
283
+ Instrumentation::Callbacks::Statistics.new(
284
+ @subscription_group.id,
285
+ @subscription_group.consumer_group_id,
286
+ @name,
287
+ ::Karafka::App.config.monitor
288
+ )
289
+ )
290
+
291
+ # Register error tracking callback
292
+ ::Karafka::Instrumentation.error_callbacks.add(
293
+ @subscription_group.id,
294
+ Instrumentation::Callbacks::Error.new(
295
+ @subscription_group.id,
296
+ @subscription_group.consumer_group_id,
297
+ @name,
298
+ ::Karafka::App.config.monitor
299
+ )
300
+ )
301
+
302
+ consumer
117
303
  end
118
304
  end
119
305
  end
@@ -2,69 +2,168 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # A single listener that listens to incoming messages from a single route
6
- # @note It does not loop on itself - it needs to be executed in a loop
7
- # @note Listener itself does nothing with the message - it will return to the block
8
- # a raw Kafka::FetchedMessage
5
+ # A single listener that listens to incoming messages from a single subscription group.
6
+ # It polls the messages and then enqueues. It also takes care of potential recovery from
7
+ # critical errors by restarting everything in a safe manner.
9
8
  class Listener
10
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
11
- # on what topics and with what settings should we listen
9
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
10
+ # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
12
11
  # @return [Karafka::Connection::Listener] listener instance
13
- def initialize(consumer_group)
14
- @consumer_group = consumer_group
12
+ def initialize(subscription_group, jobs_queue)
13
+ @subscription_group = subscription_group
14
+ @jobs_queue = jobs_queue
15
+ @pauses_manager = PausesManager.new
16
+ @client = Client.new(@subscription_group)
17
+ @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
15
18
  end
16
19
 
17
- # Runs prefetch callbacks and executes the main listener fetch loop
20
+ # Runs the main listener fetch loop.
21
+ #
22
+ # @note Prefetch callbacks can be used to seek offset or do other things before we actually
23
+ # start consuming data
18
24
  def call
19
25
  Karafka.monitor.instrument(
20
26
  'connection.listener.before_fetch_loop',
21
- consumer_group: @consumer_group,
22
- client: client
27
+ caller: self,
28
+ subscription_group: @subscription_group,
29
+ client: @client
23
30
  )
31
+
24
32
  fetch_loop
25
33
  end
26
34
 
27
35
  private
28
36
 
29
- # Opens connection, gets messages and calls a block for each of the incoming messages
37
+ # Fetches the data and adds it to the jobs queue.
38
+ #
30
39
  # @note We catch all the errors here, so they don't affect other listeners (or this one)
31
40
  # so we will be able to listen and consume other incoming messages.
32
- # Since it is run inside Karafka::Connection::ActorCluster - catching all the exceptions
33
- # won't crash the whole cluster. Here we mostly focus on catching the exceptions related to
41
+ # Since it is run inside Karafka::Connection::Runner thread - catching all the exceptions
42
+ # won't crash the whole process. Here we mostly focus on catching the exceptions related to
34
43
  # Kafka connections / Internet connection issues / Etc. Business logic problems should not
35
- # propagate this far
44
+ # propagate this far.
36
45
  def fetch_loop
37
- # @note What happens here is a delegation of processing to a proper processor based
38
- # on the incoming messages characteristics
39
- client.fetch_loop do |raw_data, type|
40
- Karafka.monitor.instrument('connection.listener.fetch_loop')
41
-
42
- case type
43
- when :message
44
- MessageDelegator.call(@consumer_group.id, raw_data)
45
- when :batch
46
- BatchDelegator.call(@consumer_group.id, raw_data)
47
- end
46
+ until Karafka::App.stopping?
47
+ Karafka.monitor.instrument(
48
+ 'connection.listener.fetch_loop',
49
+ caller: self,
50
+ client: @client
51
+ )
52
+
53
+ resume_paused_partitions
54
+ # We need to fetch data before we revoke lost partitions details as during the polling
55
+ # the callbacks for tracking lost partitions are triggered. Otherwise we would be always
56
+ # one batch behind.
57
+ messages_buffer = @client.batch_poll
58
+
59
+ Karafka.monitor.instrument(
60
+ 'connection.listener.fetch_loop.received',
61
+ caller: self,
62
+ messages_buffer: messages_buffer
63
+ )
64
+
65
+ # If there were revoked partitions, we need to wait on their jobs to finish before
66
+ # distributing consuming jobs as upon revoking, we might get assigned to the same
67
+ # partitions, thus getting their jobs. The revoking jobs need to finish before
68
+ # appropriate consumers are taken down and re-created
69
+ wait(@subscription_group) if distribute_revoke_lost_partitions_jobs
70
+
71
+ distribute_partitions_jobs(messages_buffer)
72
+
73
+ # We wait only on jobs from our subscription group. Other groups are independent.
74
+ wait(@subscription_group)
75
+
76
+ # We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
77
+ # if needed by using manual offset management.
78
+ @client.commit_offsets
48
79
  end
80
+
81
+ shutdown
82
+
49
83
  # This is on purpose - see the notes for this method
50
84
  # rubocop:disable Lint/RescueException
51
85
  rescue Exception => e
52
- Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
53
86
  # rubocop:enable Lint/RescueException
54
- # We can stop client without a problem, as it will reinitialize itself when running the
55
- # `fetch_loop` again
87
+ Karafka.monitor.instrument(
88
+ 'error.occurred',
89
+ caller: self,
90
+ error: e,
91
+ type: 'connection.listener.fetch_loop.error'
92
+ )
93
+
94
+ restart
95
+
96
+ sleep(1) && retry
97
+ end
98
+
99
+ # Resumes processing of partitions that were paused due to an error.
100
+ def resume_paused_partitions
101
+ @pauses_manager.resume { |topic, partition| @client.resume(topic, partition) }
102
+ end
103
+
104
+ # Enqueues revoking jobs for partitions that were taken away from the running process.
105
+ # @return [Boolean] was there anything to revoke
106
+ def distribute_revoke_lost_partitions_jobs
107
+ revoked_partitions = @client.rebalance_manager.revoked_partitions
108
+
109
+ return false if revoked_partitions.empty?
110
+
111
+ revoked_partitions.each do |topic, partitions|
112
+ partitions.each do |partition|
113
+ pause = @pauses_manager.fetch(topic, partition)
114
+ executor = @executors.fetch(topic, partition, pause)
115
+ @jobs_queue << Processing::Jobs::Revoked.new(executor)
116
+ end
117
+ end
118
+
119
+ true
120
+ end
121
+
122
+ # Takes the messages per topic partition and enqueues processing jobs in threads.
123
+ #
124
+ # @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
125
+ def distribute_partitions_jobs(messages_buffer)
126
+ messages_buffer.each do |topic, partition, messages|
127
+ pause = @pauses_manager.fetch(topic, partition)
128
+
129
+ next if pause.paused?
130
+
131
+ executor = @executors.fetch(topic, partition, pause)
132
+
133
+ @jobs_queue << Processing::Jobs::Consume.new(executor, messages)
134
+ end
135
+ end
136
+
137
+ # Waits for all the jobs from a given subscription group to finish before moving forward
138
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
139
+ def wait(subscription_group)
140
+ @jobs_queue.wait(subscription_group.id)
141
+ end
142
+
143
+ # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
144
+ # stops kafka client.
145
+ def shutdown
146
+ @jobs_queue.close
147
+ # This runs synchronously, making sure we finish all the shutdowns before we stop the
148
+ # client.
149
+ @executors.shutdown
150
+ @client.commit_offsets!
56
151
  @client.stop
57
- # We need to clear the consumers cache for current connection when fatal error happens and
58
- # we reset the connection. Otherwise for consumers with manual offset management, the
59
- # persistence might have stored some data that would be reprocessed
60
- Karafka::Persistence::Consumers.clear
61
- sleep(@consumer_group.reconnect_timeout) && retry
62
152
  end
63
153
 
64
- # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
65
- # consumption
66
- def client
67
- @client ||= Client.new(@consumer_group)
154
+ # We can stop client without a problem, as it will reinitialize itself when running the
155
+ # `#fetch_loop` again. We just need to remember to also reset the runner as it is a long
156
+ # running one, so with a new connection to Kafka, we need to initialize the state of the
157
+ # runner and underlying consumers once again.
158
+ def restart
159
+ # If there was any problem with processing, before we reset things we need to make sure,
160
+ # there are no jobs in the queue. Otherwise it could lead to leakage in between client
161
+ # resetting.
162
+ @jobs_queue.wait(@subscription_group.id)
163
+ @jobs_queue.clear(@subscription_group.id)
164
+ @client.reset
165
+ @pauses_manager = PausesManager.new
166
+ @executors = Processing::ExecutorsBuffer.new(@client, @subscription_group)
68
167
  end
69
168
  end
70
169
  end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Buffer for messages.
6
+ # When message is added to this buffer, it gets assigned to an array with other messages from
7
+ # the same topic and partition.
8
+ #
9
+ # @note This buffer is NOT threadsafe.
10
+ class MessagesBuffer
11
+ attr_reader :size
12
+
13
+ # @return [Karafka::Connection::MessagesBuffer] buffer instance
14
+ def initialize
15
+ @size = 0
16
+ @groups = Hash.new do |topic_groups, topic|
17
+ topic_groups[topic] = Hash.new do |partition_groups, partition|
18
+ partition_groups[partition] = []
19
+ end
20
+ end
21
+ end
22
+
23
+ # Iterates over aggregated data providing messages per topic partition.
24
+ #
25
+ # @yieldparam [String] topic name
26
+ # @yieldparam [Integer] partition number
27
+ # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
28
+ def each
29
+ @groups.each do |topic, partitions|
30
+ partitions.each do |partition, messages|
31
+ yield(topic, partition, messages)
32
+ end
33
+ end
34
+ end
35
+
36
+ # Adds a message to the buffer.
37
+ #
38
+ # @param message [Rdkafka::Consumer::Message] raw rdkafka message
39
+ # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
40
+ def <<(message)
41
+ @size += 1
42
+ @groups[message.topic][message.partition] << message
43
+ end
44
+
45
+ # Removes all the data from the buffer.
46
+ #
47
+ # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
48
+ # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
49
+ # may be used in other threads for data processing, thus if we would clear it, we could
50
+ # potentially clear a raw messages array for a job that is in the jobs queue.
51
+ def clear
52
+ @size = 0
53
+ @groups.each_value(&:clear)
54
+ end
55
+ end
56
+ end
57
+ end