ruby-kafka-aws-iam 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
@@ -0,0 +1,528 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "kafka/partitioner"
5
+ require "kafka/message_buffer"
6
+ require "kafka/produce_operation"
7
+ require "kafka/pending_message_queue"
8
+ require "kafka/pending_message"
9
+ require "kafka/compressor"
10
+ require "kafka/interceptors"
11
+
12
+ module Kafka
13
+ # Allows sending messages to a Kafka cluster.
14
+ #
15
+ # Typically you won't instantiate this class yourself, but rather have {Kafka::Client}
16
+ # do it for you, e.g.
17
+ #
18
+ # # Will instantiate Kafka::Client
19
+ # kafka = Kafka.new(["kafka1:9092", "kafka2:9092"])
20
+ #
21
+ # # Will instantiate Kafka::Producer
22
+ # producer = kafka.producer
23
+ #
24
+ # This is done in order to share a logger as well as a pool of broker connections across
25
+ # different producers. This also means that you don't need to pass the `cluster` and
26
+ # `logger` options to `#producer`. See {#initialize} for the list of other options
27
+ # you can pass in.
28
+ #
29
+ # ## Buffering
30
+ #
31
+ # The producer buffers pending messages until {#deliver_messages} is called. Note that there is
32
+ # a maximum buffer size (default is 1,000 messages) and writing messages after the
33
+ # buffer has reached this size will result in a BufferOverflow exception. Make sure
34
+ # to periodically call {#deliver_messages} or set `max_buffer_size` to an appropriate value.
35
+ #
36
+ # Buffering messages and sending them in batches greatly improves performance, so
37
+ # try to avoid sending messages after every write. The tradeoff between throughput and
38
+ # message delays depends on your use case.
39
+ #
40
+ # ## Error Handling and Retries
41
+ #
42
+ # The design of the error handling is based on having a {MessageBuffer} hold messages
43
+ # for all topics/partitions. Whenever we want to send messages to the cluster, we
44
+ # group the buffered messages by the broker they need to be sent to and fire off a
45
+ # request to each broker. A request can be a partial success, so we go through the
46
+ # response and inspect the error code for each partition that we wrote to. If the
47
+ # write to a given partition was successful, we clear the corresponding messages
48
+ # from the buffer -- otherwise, we log the error and keep the messages in the buffer.
49
+ #
50
+ # After this, we check if the buffer is empty. If it is, we're all done. If it's
51
+ # not, we do another round of requests, this time with just the remaining messages.
52
+ # We do this for as long as `max_retries` permits.
53
+ #
54
+ # ## Compression
55
+ #
56
+ # Depending on what kind of data you produce, enabling compression may yield improved
57
+ # bandwidth and space usage. Compression in Kafka is done on entire messages sets
58
+ # rather than on individual messages. This improves the compression rate and generally
59
+ # means that compressions works better the larger your buffers get, since the message
60
+ # sets will be larger by the time they're compressed.
61
+ #
62
+ # Since many workloads have variations in throughput and distribution across partitions,
63
+ # it's possible to configure a threshold for when to enable compression by setting
64
+ # `compression_threshold`. Only if the defined number of messages are buffered for a
65
+ # partition will the messages be compressed.
66
+ #
67
+ # Compression is enabled by passing the `compression_codec` parameter with the
68
+ # name of one of the algorithms allowed by Kafka:
69
+ #
70
+ # * `:snappy` for [Snappy](http://google.github.io/snappy/) compression.
71
+ # * `:gzip` for [gzip](https://en.wikipedia.org/wiki/Gzip) compression.
72
+ # * `:lz4` for [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression.
73
+ # * `:zstd` for [zstd](https://facebook.github.io/zstd/) compression.
74
+ #
75
+ # By default, all message sets will be compressed if you specify a compression
76
+ # codec. To increase the compression threshold, set `compression_threshold` to
77
+ # an integer value higher than one.
78
+ #
79
+ # ## Instrumentation
80
+ #
81
+ # Whenever {#produce} is called, the notification `produce_message.producer.kafka`
82
+ # will be emitted with the following payload:
83
+ #
84
+ # * `value` – the message value.
85
+ # * `key` – the message key.
86
+ # * `topic` – the topic that was produced to.
87
+ # * `buffer_size` – the buffer size after adding the message.
88
+ # * `max_buffer_size` – the maximum allowed buffer size for the producer.
89
+ #
90
+ # After {#deliver_messages} completes, the notification
91
+ # `deliver_messages.producer.kafka` will be emitted with the following payload:
92
+ #
93
+ # * `message_count` – the total number of messages that the producer tried to
94
+ # deliver. Note that not all messages may get delivered.
95
+ # * `delivered_message_count` – the number of messages that were successfully
96
+ # delivered.
97
+ # * `attempts` – the number of attempts made to deliver the messages.
98
+ #
99
+ # ## Example
100
+ #
101
+ # This is an example of an application which reads lines from stdin and writes them
102
+ # to Kafka:
103
+ #
104
+ # require "kafka"
105
+ #
106
+ # logger = Logger.new($stderr)
107
+ # brokers = ENV.fetch("KAFKA_BROKERS").split(",")
108
+ #
109
+ # # Make sure to create this topic in your Kafka cluster or configure the
110
+ # # cluster to auto-create topics.
111
+ # topic = "random-messages"
112
+ #
113
+ # kafka = Kafka.new(brokers, client_id: "simple-producer", logger: logger)
114
+ # producer = kafka.producer
115
+ #
116
+ # begin
117
+ # $stdin.each_with_index do |line, index|
118
+ # producer.produce(line, topic: topic)
119
+ #
120
+ # # Send messages for every 10 lines.
121
+ # producer.deliver_messages if index % 10 == 0
122
+ # end
123
+ # ensure
124
+ # # Make sure to send any remaining messages.
125
+ # producer.deliver_messages
126
+ #
127
+ # producer.shutdown
128
+ # end
129
+ #
130
+ class Producer
131
+ class AbortTransaction < StandardError; end
132
+
133
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
134
+ required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
135
+ max_buffer_bytesize:, partitioner:, interceptors: [])
136
+ @cluster = cluster
137
+ @transaction_manager = transaction_manager
138
+ @logger = TaggedLogger.new(logger)
139
+ @instrumenter = instrumenter
140
+ @required_acks = required_acks == :all ? -1 : required_acks
141
+ @ack_timeout = ack_timeout
142
+ @max_retries = max_retries
143
+ @retry_backoff = retry_backoff
144
+ @max_buffer_size = max_buffer_size
145
+ @max_buffer_bytesize = max_buffer_bytesize
146
+ @compressor = compressor
147
+ @partitioner = partitioner
148
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
149
+
150
+ # The set of topics that are produced to.
151
+ @target_topics = Set.new
152
+
153
+ # A buffer organized by topic/partition.
154
+ @buffer = MessageBuffer.new
155
+
156
+ # Messages added by `#produce` but not yet assigned a partition.
157
+ @pending_message_queue = PendingMessageQueue.new
158
+ end
159
+
160
+ def to_s
161
+ "Producer #{@target_topics.to_a.join(', ')}"
162
+ end
163
+
164
+ # Produces a message to the specified topic. Note that messages are buffered in
165
+ # the producer until {#deliver_messages} is called.
166
+ #
167
+ # ## Partitioning
168
+ #
169
+ # There are several options for specifying the partition that the message should
170
+ # be written to.
171
+ #
172
+ # The simplest option is to not specify a message key, partition key, or
173
+ # partition number, in which case the message will be assigned a partition at
174
+ # random.
175
+ #
176
+ # You can also specify the `partition` parameter yourself. This requires you to
177
+ # know which partitions are available, however. Oftentimes the best option is
178
+ # to specify the `partition_key` parameter: messages with the same partition
179
+ # key will always be assigned to the same partition, as long as the number of
180
+ # partitions doesn't change. You can also omit the partition key and specify
181
+ # a message key instead. The message key is part of the message payload, and
182
+ # so can carry semantic value--whether you want to have the message key double
183
+ # as a partition key is up to you.
184
+ #
185
+ # @param value [String] the message data.
186
+ # @param key [String] the message key.
187
+ # @param headers [Hash<String, String>] the headers for the message.
188
+ # @param topic [String] the topic that the message should be written to.
189
+ # @param partition [Integer] the partition that the message should be written to.
190
+ # @param partition_key [String] the key that should be used to assign a partition.
191
+ # @param create_time [Time] the timestamp that should be set on the message.
192
+ #
193
+ # @raise [BufferOverflow] if the maximum buffer size has been reached.
194
+ # @return [nil]
195
+ def produce(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, create_time: Time.now)
196
+ # We want to fail fast if `topic` isn't a String
197
+ topic = topic.to_str
198
+
199
+ message = @interceptors.call(PendingMessage.new(
200
+ value: value && value.to_s,
201
+ key: key && key.to_s,
202
+ headers: headers,
203
+ topic: topic,
204
+ partition: partition && Integer(partition),
205
+ partition_key: partition_key && partition_key.to_s,
206
+ create_time: create_time
207
+ ))
208
+
209
+ if buffer_size >= @max_buffer_size
210
+ buffer_overflow topic,
211
+ "Cannot produce to #{topic}, max buffer size (#{@max_buffer_size} messages) reached"
212
+ end
213
+
214
+ if buffer_bytesize + message.bytesize >= @max_buffer_bytesize
215
+ buffer_overflow topic,
216
+ "Cannot produce to #{topic}, max buffer bytesize (#{@max_buffer_bytesize} bytes) reached"
217
+ end
218
+
219
+ # If the producer is in transactional mode, all the message production
220
+ # must be used when the producer is currently in transaction
221
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
222
+ raise "Cannot produce to #{topic}: You must trigger begin_transaction before producing messages"
223
+ end
224
+
225
+ @target_topics.add(topic)
226
+ @pending_message_queue.write(message)
227
+
228
+ @instrumenter.instrument("produce_message.producer", {
229
+ value: value,
230
+ key: key,
231
+ topic: topic,
232
+ create_time: create_time,
233
+ message_size: message.bytesize,
234
+ buffer_size: buffer_size,
235
+ max_buffer_size: @max_buffer_size,
236
+ })
237
+
238
+ nil
239
+ end
240
+
241
+ # Sends all buffered messages to the Kafka brokers.
242
+ #
243
+ # Depending on the value of `required_acks` used when initializing the producer,
244
+ # this call may block until the specified number of replicas have acknowledged
245
+ # the writes. The `ack_timeout` setting places an upper bound on the amount of
246
+ # time the call will block before failing.
247
+ #
248
+ # @raise [DeliveryFailed] if not all messages could be successfully sent.
249
+ # @return [nil]
250
+ def deliver_messages
251
+ # There's no need to do anything if the buffer is empty.
252
+ return if buffer_size == 0
253
+
254
+ @instrumenter.instrument("deliver_messages.producer") do |notification|
255
+ message_count = buffer_size
256
+
257
+ notification[:message_count] = message_count
258
+ notification[:attempts] = 0
259
+
260
+ begin
261
+ deliver_messages_with_retries(notification)
262
+ ensure
263
+ notification[:delivered_message_count] = message_count - buffer_size
264
+ end
265
+ end
266
+ end
267
+
268
+ # Returns the number of messages currently held in the buffer.
269
+ #
270
+ # @return [Integer] buffer size.
271
+ def buffer_size
272
+ @pending_message_queue.size + @buffer.size
273
+ end
274
+
275
+ def buffer_bytesize
276
+ @pending_message_queue.bytesize + @buffer.bytesize
277
+ end
278
+
279
+ # Deletes all buffered messages.
280
+ #
281
+ # @return [nil]
282
+ def clear_buffer
283
+ @buffer.clear
284
+ @pending_message_queue.clear
285
+ end
286
+
287
+ # Closes all connections to the brokers.
288
+ #
289
+ # @return [nil]
290
+ def shutdown
291
+ @transaction_manager.close
292
+ @cluster.disconnect
293
+ end
294
+
295
+ # Initializes the producer to ready for future transactions. This method
296
+ # should be triggered once, before any tranactions are created.
297
+ #
298
+ # @return [nil]
299
+ def init_transactions
300
+ @transaction_manager.init_transactions
301
+ end
302
+
303
+ # Mark the beginning of a transaction. This method transitions the state
304
+ # of the transaction trantiions to IN_TRANSACTION.
305
+ #
306
+ # All producing operations can only be executed while the transation is
307
+ # in this state. The records are persisted by Kafka brokers, but not visible
308
+ # the consumers until the #commit_transaction method is trigger. After a
309
+ # timeout period without committed, the transaction is timeout and
310
+ # considered as aborted.
311
+ #
312
+ # @return [nil]
313
+ def begin_transaction
314
+ @transaction_manager.begin_transaction
315
+ end
316
+
317
+ # This method commits the pending transaction, marks all the produced
318
+ # records committed. After that, they are visible to the consumers.
319
+ #
320
+ # This method can only be called if and only if the current transaction
321
+ # is at IN_TRANSACTION state.
322
+ #
323
+ # @return [nil]
324
+ def commit_transaction
325
+ @transaction_manager.commit_transaction
326
+ end
327
+
328
+ # This method abort the pending transaction, marks all the produced
329
+ # records aborted. All the records will be wiped out by the brokers and the
330
+ # cosumers don't have a chance to consume those messages, except they enable
331
+ # consuming uncommitted option.
332
+ #
333
+ # This method can only be called if and only if the current transaction
334
+ # is at IN_TRANSACTION state.
335
+ #
336
+ # @return [nil]
337
+ def abort_transaction
338
+ @transaction_manager.abort_transaction
339
+ end
340
+
341
+ # Sends batch last offset to the consumer group coordinator, and also marks
342
+ # this offset as part of the current transaction. This offset will be considered
343
+ # committed only if the transaction is committed successfully.
344
+ #
345
+ # This method should be used when you need to batch consumed and produced messages
346
+ # together, typically in a consume-transform-produce pattern. Thus, the specified
347
+ # group_id should be the same as config parameter group_id of the used
348
+ # consumer.
349
+ #
350
+ # @return [nil]
351
+ def send_offsets_to_transaction(batch:, group_id:)
352
+ @transaction_manager.send_offsets_to_txn(offsets: { batch.topic => { batch.partition => { offset: batch.last_offset + 1, leader_epoch: batch.leader_epoch } } }, group_id: group_id)
353
+ end
354
+
355
+ # Syntactic sugar to enable easier transaction usage. Do the following steps
356
+ #
357
+ # - Start the transaction (with Producer#begin_transaction)
358
+ # - Yield the given block
359
+ # - Commit the transaction (with Producer#commit_transaction)
360
+ #
361
+ # If the block raises exception, the transaction is automatically aborted
362
+ # *before* bubble up the exception.
363
+ #
364
+ # If the block raises Kafka::Producer::AbortTransaction indicator exception,
365
+ # it aborts the transaction silently, without throwing up that exception.
366
+ #
367
+ # @return [nil]
368
+ def transaction
369
+ raise 'This method requires a block' unless block_given?
370
+ begin_transaction
371
+ yield
372
+ commit_transaction
373
+ rescue Kafka::Producer::AbortTransaction
374
+ abort_transaction
375
+ rescue
376
+ abort_transaction
377
+ raise
378
+ end
379
+
380
+ private
381
+
382
+ def deliver_messages_with_retries(notification)
383
+ attempt = 0
384
+
385
+ @cluster.add_target_topics(@target_topics)
386
+
387
+ operation = ProduceOperation.new(
388
+ cluster: @cluster,
389
+ transaction_manager: @transaction_manager,
390
+ buffer: @buffer,
391
+ required_acks: @required_acks,
392
+ ack_timeout: @ack_timeout,
393
+ compressor: @compressor,
394
+ logger: @logger,
395
+ instrumenter: @instrumenter,
396
+ )
397
+
398
+ loop do
399
+ attempt += 1
400
+
401
+ notification[:attempts] = attempt
402
+
403
+ begin
404
+ @cluster.refresh_metadata_if_necessary!
405
+ rescue ConnectionError => e
406
+ raise DeliveryFailed.new(e, buffer_messages)
407
+ end
408
+
409
+ assign_partitions!
410
+ operation.execute
411
+
412
+ if @required_acks.zero?
413
+ # No response is returned by the brokers, so we can't know which messages
414
+ # have been successfully written. Our only option is to assume that they all
415
+ # have.
416
+ @buffer.clear
417
+ end
418
+
419
+ if buffer_size.zero?
420
+ break
421
+ elsif attempt <= @max_retries
422
+ @logger.warn "Failed to send all messages to #{pretty_partitions}; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
423
+
424
+ sleep @retry_backoff
425
+ else
426
+ @logger.error "Failed to send all messages to #{pretty_partitions}; keeping remaining messages in buffer"
427
+ break
428
+ end
429
+ end
430
+
431
+ unless @pending_message_queue.empty?
432
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
433
+ @cluster.mark_as_stale!
434
+ raise DeliveryFailed.new("Failed to assign partitions to #{@pending_message_queue.size} messages", buffer_messages)
435
+ end
436
+
437
+ unless @buffer.empty?
438
+ raise DeliveryFailed.new("Failed to send messages to #{pretty_partitions}", buffer_messages)
439
+ end
440
+ end
441
+
442
+ def pretty_partitions
443
+ @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
444
+ end
445
+
446
+ def assign_partitions!
447
+ failed_messages = []
448
+ topics_with_failures = Set.new
449
+
450
+ @pending_message_queue.each do |message|
451
+ partition = message.partition
452
+
453
+ begin
454
+ # If a message for a topic fails to receive a partition all subsequent
455
+ # messages for the topic should be retried to preserve ordering
456
+ if topics_with_failures.include?(message.topic)
457
+ failed_messages << message
458
+ next
459
+ end
460
+
461
+ if partition.nil?
462
+ partition_count = @cluster.partitions_for(message.topic).count
463
+ partition = @partitioner.call(partition_count, message)
464
+ end
465
+
466
+ @buffer.write(
467
+ value: message.value,
468
+ key: message.key,
469
+ headers: message.headers,
470
+ topic: message.topic,
471
+ partition: partition,
472
+ create_time: message.create_time,
473
+ )
474
+ rescue Kafka::Error => e
475
+ @instrumenter.instrument("topic_error.producer", {
476
+ topic: message.topic,
477
+ exception: [e.class.to_s, e.message],
478
+ })
479
+
480
+ topics_with_failures << message.topic
481
+ failed_messages << message
482
+ end
483
+ end
484
+
485
+ if failed_messages.any?
486
+ failed_messages.group_by(&:topic).each do |topic, messages|
487
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
488
+ end
489
+
490
+ @cluster.mark_as_stale!
491
+ end
492
+
493
+ @pending_message_queue.replace(failed_messages)
494
+ end
495
+
496
+ def buffer_messages
497
+ messages = []
498
+
499
+ @pending_message_queue.each do |message|
500
+ messages << message
501
+ end
502
+
503
+ @buffer.each do |topic, partition, messages_for_partition|
504
+ messages_for_partition.each do |message|
505
+ messages << PendingMessage.new(
506
+ value: message.value,
507
+ key: message.key,
508
+ headers: message.headers,
509
+ topic: topic,
510
+ partition: partition,
511
+ partition_key: nil,
512
+ create_time: message.create_time
513
+ )
514
+ end
515
+ end
516
+
517
+ messages
518
+ end
519
+
520
+ def buffer_overflow(topic, message)
521
+ @instrumenter.instrument("buffer_overflow.producer", {
522
+ topic: topic,
523
+ })
524
+
525
+ raise BufferOverflow, message
526
+ end
527
+ end
528
+ end