ruby-kafka-aws-iam 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
@@ -0,0 +1,528 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "kafka/partitioner"
5
+ require "kafka/message_buffer"
6
+ require "kafka/produce_operation"
7
+ require "kafka/pending_message_queue"
8
+ require "kafka/pending_message"
9
+ require "kafka/compressor"
10
+ require "kafka/interceptors"
11
+
12
+ module Kafka
13
+ # Allows sending messages to a Kafka cluster.
14
+ #
15
+ # Typically you won't instantiate this class yourself, but rather have {Kafka::Client}
16
+ # do it for you, e.g.
17
+ #
18
+ # # Will instantiate Kafka::Client
19
+ # kafka = Kafka.new(["kafka1:9092", "kafka2:9092"])
20
+ #
21
+ # # Will instantiate Kafka::Producer
22
+ # producer = kafka.producer
23
+ #
24
+ # This is done in order to share a logger as well as a pool of broker connections across
25
+ # different producers. This also means that you don't need to pass the `cluster` and
26
+ # `logger` options to `#producer`. See {#initialize} for the list of other options
27
+ # you can pass in.
28
+ #
29
+ # ## Buffering
30
+ #
31
+ # The producer buffers pending messages until {#deliver_messages} is called. Note that there is
32
+ # a maximum buffer size (default is 1,000 messages) and writing messages after the
33
+ # buffer has reached this size will result in a BufferOverflow exception. Make sure
34
+ # to periodically call {#deliver_messages} or set `max_buffer_size` to an appropriate value.
35
+ #
36
+ # Buffering messages and sending them in batches greatly improves performance, so
37
+ # try to avoid sending messages after every write. The tradeoff between throughput and
38
+ # message delays depends on your use case.
39
+ #
40
+ # ## Error Handling and Retries
41
+ #
42
+ # The design of the error handling is based on having a {MessageBuffer} hold messages
43
+ # for all topics/partitions. Whenever we want to send messages to the cluster, we
44
+ # group the buffered messages by the broker they need to be sent to and fire off a
45
+ # request to each broker. A request can be a partial success, so we go through the
46
+ # response and inspect the error code for each partition that we wrote to. If the
47
+ # write to a given partition was successful, we clear the corresponding messages
48
+ # from the buffer -- otherwise, we log the error and keep the messages in the buffer.
49
+ #
50
+ # After this, we check if the buffer is empty. If it is, we're all done. If it's
51
+ # not, we do another round of requests, this time with just the remaining messages.
52
+ # We do this for as long as `max_retries` permits.
53
+ #
54
+ # ## Compression
55
+ #
56
+ # Depending on what kind of data you produce, enabling compression may yield improved
57
+ # bandwidth and space usage. Compression in Kafka is done on entire messages sets
58
+ # rather than on individual messages. This improves the compression rate and generally
59
+ # means that compressions works better the larger your buffers get, since the message
60
+ # sets will be larger by the time they're compressed.
61
+ #
62
+ # Since many workloads have variations in throughput and distribution across partitions,
63
+ # it's possible to configure a threshold for when to enable compression by setting
64
+ # `compression_threshold`. Only if the defined number of messages are buffered for a
65
+ # partition will the messages be compressed.
66
+ #
67
+ # Compression is enabled by passing the `compression_codec` parameter with the
68
+ # name of one of the algorithms allowed by Kafka:
69
+ #
70
+ # * `:snappy` for [Snappy](http://google.github.io/snappy/) compression.
71
+ # * `:gzip` for [gzip](https://en.wikipedia.org/wiki/Gzip) compression.
72
+ # * `:lz4` for [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression.
73
+ # * `:zstd` for [zstd](https://facebook.github.io/zstd/) compression.
74
+ #
75
+ # By default, all message sets will be compressed if you specify a compression
76
+ # codec. To increase the compression threshold, set `compression_threshold` to
77
+ # an integer value higher than one.
78
+ #
79
+ # ## Instrumentation
80
+ #
81
+ # Whenever {#produce} is called, the notification `produce_message.producer.kafka`
82
+ # will be emitted with the following payload:
83
+ #
84
+ # * `value` – the message value.
85
+ # * `key` – the message key.
86
+ # * `topic` – the topic that was produced to.
87
+ # * `buffer_size` – the buffer size after adding the message.
88
+ # * `max_buffer_size` – the maximum allowed buffer size for the producer.
89
+ #
90
+ # After {#deliver_messages} completes, the notification
91
+ # `deliver_messages.producer.kafka` will be emitted with the following payload:
92
+ #
93
+ # * `message_count` – the total number of messages that the producer tried to
94
+ # deliver. Note that not all messages may get delivered.
95
+ # * `delivered_message_count` – the number of messages that were successfully
96
+ # delivered.
97
+ # * `attempts` – the number of attempts made to deliver the messages.
98
+ #
99
+ # ## Example
100
+ #
101
+ # This is an example of an application which reads lines from stdin and writes them
102
+ # to Kafka:
103
+ #
104
+ # require "kafka"
105
+ #
106
+ # logger = Logger.new($stderr)
107
+ # brokers = ENV.fetch("KAFKA_BROKERS").split(",")
108
+ #
109
+ # # Make sure to create this topic in your Kafka cluster or configure the
110
+ # # cluster to auto-create topics.
111
+ # topic = "random-messages"
112
+ #
113
+ # kafka = Kafka.new(brokers, client_id: "simple-producer", logger: logger)
114
+ # producer = kafka.producer
115
+ #
116
+ # begin
117
+ # $stdin.each_with_index do |line, index|
118
+ # producer.produce(line, topic: topic)
119
+ #
120
+ # # Send messages for every 10 lines.
121
+ # producer.deliver_messages if index % 10 == 0
122
+ # end
123
+ # ensure
124
+ # # Make sure to send any remaining messages.
125
+ # producer.deliver_messages
126
+ #
127
+ # producer.shutdown
128
+ # end
129
+ #
130
+ class Producer
131
+ class AbortTransaction < StandardError; end
132
+
133
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
134
+ required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
135
+ max_buffer_bytesize:, partitioner:, interceptors: [])
136
+ @cluster = cluster
137
+ @transaction_manager = transaction_manager
138
+ @logger = TaggedLogger.new(logger)
139
+ @instrumenter = instrumenter
140
+ @required_acks = required_acks == :all ? -1 : required_acks
141
+ @ack_timeout = ack_timeout
142
+ @max_retries = max_retries
143
+ @retry_backoff = retry_backoff
144
+ @max_buffer_size = max_buffer_size
145
+ @max_buffer_bytesize = max_buffer_bytesize
146
+ @compressor = compressor
147
+ @partitioner = partitioner
148
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
149
+
150
+ # The set of topics that are produced to.
151
+ @target_topics = Set.new
152
+
153
+ # A buffer organized by topic/partition.
154
+ @buffer = MessageBuffer.new
155
+
156
+ # Messages added by `#produce` but not yet assigned a partition.
157
+ @pending_message_queue = PendingMessageQueue.new
158
+ end
159
+
160
+ def to_s
161
+ "Producer #{@target_topics.to_a.join(', ')}"
162
+ end
163
+
164
+ # Produces a message to the specified topic. Note that messages are buffered in
165
+ # the producer until {#deliver_messages} is called.
166
+ #
167
+ # ## Partitioning
168
+ #
169
+ # There are several options for specifying the partition that the message should
170
+ # be written to.
171
+ #
172
+ # The simplest option is to not specify a message key, partition key, or
173
+ # partition number, in which case the message will be assigned a partition at
174
+ # random.
175
+ #
176
+ # You can also specify the `partition` parameter yourself. This requires you to
177
+ # know which partitions are available, however. Oftentimes the best option is
178
+ # to specify the `partition_key` parameter: messages with the same partition
179
+ # key will always be assigned to the same partition, as long as the number of
180
+ # partitions doesn't change. You can also omit the partition key and specify
181
+ # a message key instead. The message key is part of the message payload, and
182
+ # so can carry semantic value--whether you want to have the message key double
183
+ # as a partition key is up to you.
184
+ #
185
+ # @param value [String] the message data.
186
+ # @param key [String] the message key.
187
+ # @param headers [Hash<String, String>] the headers for the message.
188
+ # @param topic [String] the topic that the message should be written to.
189
+ # @param partition [Integer] the partition that the message should be written to.
190
+ # @param partition_key [String] the key that should be used to assign a partition.
191
+ # @param create_time [Time] the timestamp that should be set on the message.
192
+ #
193
+ # @raise [BufferOverflow] if the maximum buffer size has been reached.
194
+ # @return [nil]
195
+ def produce(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, create_time: Time.now)
196
+ # We want to fail fast if `topic` isn't a String
197
+ topic = topic.to_str
198
+
199
+ message = @interceptors.call(PendingMessage.new(
200
+ value: value && value.to_s,
201
+ key: key && key.to_s,
202
+ headers: headers,
203
+ topic: topic,
204
+ partition: partition && Integer(partition),
205
+ partition_key: partition_key && partition_key.to_s,
206
+ create_time: create_time
207
+ ))
208
+
209
+ if buffer_size >= @max_buffer_size
210
+ buffer_overflow topic,
211
+ "Cannot produce to #{topic}, max buffer size (#{@max_buffer_size} messages) reached"
212
+ end
213
+
214
+ if buffer_bytesize + message.bytesize >= @max_buffer_bytesize
215
+ buffer_overflow topic,
216
+ "Cannot produce to #{topic}, max buffer bytesize (#{@max_buffer_bytesize} bytes) reached"
217
+ end
218
+
219
+ # If the producer is in transactional mode, all the message production
220
+ # must be used when the producer is currently in transaction
221
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
222
+ raise "Cannot produce to #{topic}: You must trigger begin_transaction before producing messages"
223
+ end
224
+
225
+ @target_topics.add(topic)
226
+ @pending_message_queue.write(message)
227
+
228
+ @instrumenter.instrument("produce_message.producer", {
229
+ value: value,
230
+ key: key,
231
+ topic: topic,
232
+ create_time: create_time,
233
+ message_size: message.bytesize,
234
+ buffer_size: buffer_size,
235
+ max_buffer_size: @max_buffer_size,
236
+ })
237
+
238
+ nil
239
+ end
240
+
241
+ # Sends all buffered messages to the Kafka brokers.
242
+ #
243
+ # Depending on the value of `required_acks` used when initializing the producer,
244
+ # this call may block until the specified number of replicas have acknowledged
245
+ # the writes. The `ack_timeout` setting places an upper bound on the amount of
246
+ # time the call will block before failing.
247
+ #
248
+ # @raise [DeliveryFailed] if not all messages could be successfully sent.
249
+ # @return [nil]
250
+ def deliver_messages
251
+ # There's no need to do anything if the buffer is empty.
252
+ return if buffer_size == 0
253
+
254
+ @instrumenter.instrument("deliver_messages.producer") do |notification|
255
+ message_count = buffer_size
256
+
257
+ notification[:message_count] = message_count
258
+ notification[:attempts] = 0
259
+
260
+ begin
261
+ deliver_messages_with_retries(notification)
262
+ ensure
263
+ notification[:delivered_message_count] = message_count - buffer_size
264
+ end
265
+ end
266
+ end
267
+
268
+ # Returns the number of messages currently held in the buffer.
269
+ #
270
+ # @return [Integer] buffer size.
271
+ def buffer_size
272
+ @pending_message_queue.size + @buffer.size
273
+ end
274
+
275
+ def buffer_bytesize
276
+ @pending_message_queue.bytesize + @buffer.bytesize
277
+ end
278
+
279
+ # Deletes all buffered messages.
280
+ #
281
+ # @return [nil]
282
+ def clear_buffer
283
+ @buffer.clear
284
+ @pending_message_queue.clear
285
+ end
286
+
287
+ # Closes all connections to the brokers.
288
+ #
289
+ # @return [nil]
290
+ def shutdown
291
+ @transaction_manager.close
292
+ @cluster.disconnect
293
+ end
294
+
295
+ # Initializes the producer to ready for future transactions. This method
296
+ # should be triggered once, before any tranactions are created.
297
+ #
298
+ # @return [nil]
299
+ def init_transactions
300
+ @transaction_manager.init_transactions
301
+ end
302
+
303
+ # Mark the beginning of a transaction. This method transitions the state
304
+ # of the transaction trantiions to IN_TRANSACTION.
305
+ #
306
+ # All producing operations can only be executed while the transation is
307
+ # in this state. The records are persisted by Kafka brokers, but not visible
308
+ # the consumers until the #commit_transaction method is trigger. After a
309
+ # timeout period without committed, the transaction is timeout and
310
+ # considered as aborted.
311
+ #
312
+ # @return [nil]
313
+ def begin_transaction
314
+ @transaction_manager.begin_transaction
315
+ end
316
+
317
+ # This method commits the pending transaction, marks all the produced
318
+ # records committed. After that, they are visible to the consumers.
319
+ #
320
+ # This method can only be called if and only if the current transaction
321
+ # is at IN_TRANSACTION state.
322
+ #
323
+ # @return [nil]
324
+ def commit_transaction
325
+ @transaction_manager.commit_transaction
326
+ end
327
+
328
+ # This method abort the pending transaction, marks all the produced
329
+ # records aborted. All the records will be wiped out by the brokers and the
330
+ # cosumers don't have a chance to consume those messages, except they enable
331
+ # consuming uncommitted option.
332
+ #
333
+ # This method can only be called if and only if the current transaction
334
+ # is at IN_TRANSACTION state.
335
+ #
336
+ # @return [nil]
337
+ def abort_transaction
338
+ @transaction_manager.abort_transaction
339
+ end
340
+
341
+ # Sends batch last offset to the consumer group coordinator, and also marks
342
+ # this offset as part of the current transaction. This offset will be considered
343
+ # committed only if the transaction is committed successfully.
344
+ #
345
+ # This method should be used when you need to batch consumed and produced messages
346
+ # together, typically in a consume-transform-produce pattern. Thus, the specified
347
+ # group_id should be the same as config parameter group_id of the used
348
+ # consumer.
349
+ #
350
+ # @return [nil]
351
+ def send_offsets_to_transaction(batch:, group_id:)
352
+ @transaction_manager.send_offsets_to_txn(offsets: { batch.topic => { batch.partition => { offset: batch.last_offset + 1, leader_epoch: batch.leader_epoch } } }, group_id: group_id)
353
+ end
354
+
355
+ # Syntactic sugar to enable easier transaction usage. Do the following steps
356
+ #
357
+ # - Start the transaction (with Producer#begin_transaction)
358
+ # - Yield the given block
359
+ # - Commit the transaction (with Producer#commit_transaction)
360
+ #
361
+ # If the block raises exception, the transaction is automatically aborted
362
+ # *before* bubble up the exception.
363
+ #
364
+ # If the block raises Kafka::Producer::AbortTransaction indicator exception,
365
+ # it aborts the transaction silently, without throwing up that exception.
366
+ #
367
+ # @return [nil]
368
+ def transaction
369
+ raise 'This method requires a block' unless block_given?
370
+ begin_transaction
371
+ yield
372
+ commit_transaction
373
+ rescue Kafka::Producer::AbortTransaction
374
+ abort_transaction
375
+ rescue
376
+ abort_transaction
377
+ raise
378
+ end
379
+
380
+ private
381
+
382
+ def deliver_messages_with_retries(notification)
383
+ attempt = 0
384
+
385
+ @cluster.add_target_topics(@target_topics)
386
+
387
+ operation = ProduceOperation.new(
388
+ cluster: @cluster,
389
+ transaction_manager: @transaction_manager,
390
+ buffer: @buffer,
391
+ required_acks: @required_acks,
392
+ ack_timeout: @ack_timeout,
393
+ compressor: @compressor,
394
+ logger: @logger,
395
+ instrumenter: @instrumenter,
396
+ )
397
+
398
+ loop do
399
+ attempt += 1
400
+
401
+ notification[:attempts] = attempt
402
+
403
+ begin
404
+ @cluster.refresh_metadata_if_necessary!
405
+ rescue ConnectionError => e
406
+ raise DeliveryFailed.new(e, buffer_messages)
407
+ end
408
+
409
+ assign_partitions!
410
+ operation.execute
411
+
412
+ if @required_acks.zero?
413
+ # No response is returned by the brokers, so we can't know which messages
414
+ # have been successfully written. Our only option is to assume that they all
415
+ # have.
416
+ @buffer.clear
417
+ end
418
+
419
+ if buffer_size.zero?
420
+ break
421
+ elsif attempt <= @max_retries
422
+ @logger.warn "Failed to send all messages to #{pretty_partitions}; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
423
+
424
+ sleep @retry_backoff
425
+ else
426
+ @logger.error "Failed to send all messages to #{pretty_partitions}; keeping remaining messages in buffer"
427
+ break
428
+ end
429
+ end
430
+
431
+ unless @pending_message_queue.empty?
432
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
433
+ @cluster.mark_as_stale!
434
+ raise DeliveryFailed.new("Failed to assign partitions to #{@pending_message_queue.size} messages", buffer_messages)
435
+ end
436
+
437
+ unless @buffer.empty?
438
+ raise DeliveryFailed.new("Failed to send messages to #{pretty_partitions}", buffer_messages)
439
+ end
440
+ end
441
+
442
+ def pretty_partitions
443
+ @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
444
+ end
445
+
446
+ def assign_partitions!
447
+ failed_messages = []
448
+ topics_with_failures = Set.new
449
+
450
+ @pending_message_queue.each do |message|
451
+ partition = message.partition
452
+
453
+ begin
454
+ # If a message for a topic fails to receive a partition all subsequent
455
+ # messages for the topic should be retried to preserve ordering
456
+ if topics_with_failures.include?(message.topic)
457
+ failed_messages << message
458
+ next
459
+ end
460
+
461
+ if partition.nil?
462
+ partition_count = @cluster.partitions_for(message.topic).count
463
+ partition = @partitioner.call(partition_count, message)
464
+ end
465
+
466
+ @buffer.write(
467
+ value: message.value,
468
+ key: message.key,
469
+ headers: message.headers,
470
+ topic: message.topic,
471
+ partition: partition,
472
+ create_time: message.create_time,
473
+ )
474
+ rescue Kafka::Error => e
475
+ @instrumenter.instrument("topic_error.producer", {
476
+ topic: message.topic,
477
+ exception: [e.class.to_s, e.message],
478
+ })
479
+
480
+ topics_with_failures << message.topic
481
+ failed_messages << message
482
+ end
483
+ end
484
+
485
+ if failed_messages.any?
486
+ failed_messages.group_by(&:topic).each do |topic, messages|
487
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
488
+ end
489
+
490
+ @cluster.mark_as_stale!
491
+ end
492
+
493
+ @pending_message_queue.replace(failed_messages)
494
+ end
495
+
496
+ def buffer_messages
497
+ messages = []
498
+
499
+ @pending_message_queue.each do |message|
500
+ messages << message
501
+ end
502
+
503
+ @buffer.each do |topic, partition, messages_for_partition|
504
+ messages_for_partition.each do |message|
505
+ messages << PendingMessage.new(
506
+ value: message.value,
507
+ key: message.key,
508
+ headers: message.headers,
509
+ topic: topic,
510
+ partition: partition,
511
+ partition_key: nil,
512
+ create_time: message.create_time
513
+ )
514
+ end
515
+ end
516
+
517
+ messages
518
+ end
519
+
520
+ def buffer_overflow(topic, message)
521
+ @instrumenter.instrument("buffer_overflow.producer", {
522
+ topic: topic,
523
+ })
524
+
525
+ raise BufferOverflow, message
526
+ end
527
+ end
528
+ end