RubyGems - ruby-kafka-temp-fork - Versions diffs - 0.0.1 - Mend

ruby-kafka-temp-fork 0.0.1

Files changed (144) hide show

checksums.yaml +7 -0
data/.circleci/config.yml +393 -0
data/.github/workflows/stale.yml +19 -0
data/.gitignore +13 -0
data/.readygo +1 -0
data/.rspec +3 -0
data/.rubocop.yml +44 -0
data/.ruby-version +1 -0
data/.yardopts +3 -0
data/CHANGELOG.md +310 -0
data/Gemfile +5 -0
data/ISSUE_TEMPLATE.md +23 -0
data/LICENSE.txt +176 -0
data/Procfile +2 -0
data/README.md +1342 -0
data/Rakefile +8 -0
data/benchmarks/message_encoding.rb +23 -0
data/bin/console +8 -0
data/bin/setup +5 -0
data/docker-compose.yml +39 -0
data/examples/consumer-group.rb +35 -0
data/examples/firehose-consumer.rb +64 -0
data/examples/firehose-producer.rb +54 -0
data/examples/simple-consumer.rb +34 -0
data/examples/simple-producer.rb +42 -0
data/examples/ssl-producer.rb +44 -0
data/lib/kafka.rb +373 -0
data/lib/kafka/async_producer.rb +291 -0
data/lib/kafka/broker.rb +217 -0
data/lib/kafka/broker_info.rb +16 -0
data/lib/kafka/broker_pool.rb +41 -0
data/lib/kafka/broker_uri.rb +43 -0
data/lib/kafka/client.rb +833 -0
data/lib/kafka/cluster.rb +513 -0
data/lib/kafka/compression.rb +45 -0
data/lib/kafka/compressor.rb +86 -0
data/lib/kafka/connection.rb +223 -0
data/lib/kafka/connection_builder.rb +33 -0
data/lib/kafka/consumer.rb +642 -0
data/lib/kafka/consumer_group.rb +231 -0
data/lib/kafka/consumer_group/assignor.rb +63 -0
data/lib/kafka/crc32_hash.rb +15 -0
data/lib/kafka/datadog.rb +420 -0
data/lib/kafka/digest.rb +22 -0
data/lib/kafka/fetch_operation.rb +115 -0
data/lib/kafka/fetched_batch.rb +58 -0
data/lib/kafka/fetched_batch_generator.rb +120 -0
data/lib/kafka/fetched_message.rb +48 -0
data/lib/kafka/fetched_offset_resolver.rb +48 -0
data/lib/kafka/fetcher.rb +224 -0
data/lib/kafka/gzip_codec.rb +34 -0
data/lib/kafka/heartbeat.rb +25 -0
data/lib/kafka/instrumenter.rb +38 -0
data/lib/kafka/interceptors.rb +33 -0
data/lib/kafka/lz4_codec.rb +27 -0
data/lib/kafka/message_buffer.rb +87 -0
data/lib/kafka/murmur2_hash.rb +17 -0
data/lib/kafka/offset_manager.rb +259 -0
data/lib/kafka/partitioner.rb +40 -0
data/lib/kafka/pause.rb +92 -0
data/lib/kafka/pending_message.rb +29 -0
data/lib/kafka/pending_message_queue.rb +41 -0
data/lib/kafka/produce_operation.rb +205 -0
data/lib/kafka/producer.rb +528 -0
data/lib/kafka/prometheus.rb +316 -0
data/lib/kafka/protocol.rb +225 -0
data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
data/lib/kafka/protocol/alter_configs_request.rb +44 -0
data/lib/kafka/protocol/alter_configs_response.rb +49 -0
data/lib/kafka/protocol/api_versions_request.rb +21 -0
data/lib/kafka/protocol/api_versions_response.rb +53 -0
data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
data/lib/kafka/protocol/create_partitions_request.rb +42 -0
data/lib/kafka/protocol/create_partitions_response.rb +28 -0
data/lib/kafka/protocol/create_topics_request.rb +45 -0
data/lib/kafka/protocol/create_topics_response.rb +26 -0
data/lib/kafka/protocol/decoder.rb +175 -0
data/lib/kafka/protocol/delete_topics_request.rb +33 -0
data/lib/kafka/protocol/delete_topics_response.rb +26 -0
data/lib/kafka/protocol/describe_configs_request.rb +35 -0
data/lib/kafka/protocol/describe_configs_response.rb +73 -0
data/lib/kafka/protocol/describe_groups_request.rb +27 -0
data/lib/kafka/protocol/describe_groups_response.rb +73 -0
data/lib/kafka/protocol/encoder.rb +184 -0
data/lib/kafka/protocol/end_txn_request.rb +29 -0
data/lib/kafka/protocol/end_txn_response.rb +19 -0
data/lib/kafka/protocol/fetch_request.rb +70 -0
data/lib/kafka/protocol/fetch_response.rb +136 -0
data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
data/lib/kafka/protocol/heartbeat_request.rb +27 -0
data/lib/kafka/protocol/heartbeat_response.rb +17 -0
data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
data/lib/kafka/protocol/join_group_request.rb +47 -0
data/lib/kafka/protocol/join_group_response.rb +41 -0
data/lib/kafka/protocol/leave_group_request.rb +25 -0
data/lib/kafka/protocol/leave_group_response.rb +17 -0
data/lib/kafka/protocol/list_groups_request.rb +23 -0
data/lib/kafka/protocol/list_groups_response.rb +35 -0
data/lib/kafka/protocol/list_offset_request.rb +53 -0
data/lib/kafka/protocol/list_offset_response.rb +89 -0
data/lib/kafka/protocol/member_assignment.rb +42 -0
data/lib/kafka/protocol/message.rb +172 -0
data/lib/kafka/protocol/message_set.rb +55 -0
data/lib/kafka/protocol/metadata_request.rb +31 -0
data/lib/kafka/protocol/metadata_response.rb +185 -0
data/lib/kafka/protocol/offset_commit_request.rb +47 -0
data/lib/kafka/protocol/offset_commit_response.rb +29 -0
data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
data/lib/kafka/protocol/produce_request.rb +94 -0
data/lib/kafka/protocol/produce_response.rb +63 -0
data/lib/kafka/protocol/record.rb +88 -0
data/lib/kafka/protocol/record_batch.rb +223 -0
data/lib/kafka/protocol/request_message.rb +26 -0
data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
data/lib/kafka/protocol/sync_group_request.rb +33 -0
data/lib/kafka/protocol/sync_group_response.rb +26 -0
data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
data/lib/kafka/sasl/gssapi.rb +76 -0
data/lib/kafka/sasl/oauth.rb +64 -0
data/lib/kafka/sasl/plain.rb +39 -0
data/lib/kafka/sasl/scram.rb +180 -0
data/lib/kafka/sasl_authenticator.rb +61 -0
data/lib/kafka/snappy_codec.rb +29 -0
data/lib/kafka/socket_with_timeout.rb +96 -0
data/lib/kafka/ssl_context.rb +66 -0
data/lib/kafka/ssl_socket_with_timeout.rb +188 -0
data/lib/kafka/statsd.rb +296 -0
data/lib/kafka/tagged_logger.rb +77 -0
data/lib/kafka/transaction_manager.rb +306 -0
data/lib/kafka/transaction_state_machine.rb +72 -0
data/lib/kafka/version.rb +5 -0
data/lib/kafka/zstd_codec.rb +27 -0
data/lib/ruby-kafka-temp-fork.rb +5 -0
data/ruby-kafka-temp-fork.gemspec +54 -0
metadata +520 -0

data/lib/kafka/partitioner.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+require "kafka/digest"
+module Kafka
+  # Assigns partitions to messages.
+  class Partitioner
+    # @param hash_function [Symbol, nil] the algorithm used to compute a messages
+    #   destination partition. Default is :crc32
+    def initialize(hash_function: nil)
+      @digest = Digest.find_digest(hash_function || :crc32)
+    end
+    # Assigns a partition number based on a partition key. If no explicit
+    # partition key is provided, the message key will be used instead.
+    #
+    # If the key is nil, then a random partition is selected. Otherwise, a digest
+    # of the key is used to deterministically find a partition. As long as the
+    # number of partitions doesn't change, the same key will always be assigned
+    # to the same partition.
+    #
+    # @param partition_count [Integer] the number of partitions in the topic.
+    # @param message [Kafka::PendingMessage] the message that should be assigned
+    #   a partition.
+    # @return [Integer] the partition number.
+    def call(partition_count, message)
+      raise ArgumentError if partition_count == 0
+      # If no explicit partition key is specified we use the message key instead.
+      key = message.partition_key || message.key
+      if key.nil?
+        rand(partition_count)
+      else
+        @digest.hash(key) % partition_count
+      end
+    end
+  end
+end

data/lib/kafka/pause.rb ADDED Viewed

@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+module Kafka
+  # Manages the pause state of a partition.
+  #
+  # The processing of messages in a partition can be paused, e.g. if there was
+  # an exception during processing. This could be caused by a downstream service
+  # not being available. A typical way of solving such an issue is to back off
+  # for a little while and then try again. In order to do that, _pause_ the
+  # partition.
+  class Pause
+    def initialize(clock: Time)
+      @clock = clock
+      @started_at = nil
+      @pauses = 0
+      @timeout = nil
+      @max_timeout = nil
+      @exponential_backoff = false
+    end
+    # Mark the partition as paused.
+    #
+    # If exponential backoff is enabled, each subsequent pause of a partition will
+    # cause a doubling of the actual timeout, i.e. for pause number _n_, the actual
+    # timeout will be _2^n * timeout_.
+    #
+    # Only when {#reset!} is called is this state cleared.
+    #
+    # @param timeout [nil, Integer] if specified, the partition will automatically
+    #   resume after this many seconds.
+    # @param exponential_backoff [Boolean] whether to enable exponential timeouts.
+    def pause!(timeout: nil, max_timeout: nil, exponential_backoff: false)
+      @started_at = @clock.now
+      @timeout = timeout
+      @max_timeout = max_timeout
+      @exponential_backoff = exponential_backoff
+      @pauses += 1
+    end
+    # Resumes the partition.
+    #
+    # The number of pauses is still retained, and if the partition is paused again
+    # it may be with an exponential backoff.
+    def resume!
+      @started_at = nil
+      @timeout = nil
+      @max_timeout = nil
+    end
+    # Whether the partition is currently paused. The pause may have expired, in which
+    # case {#expired?} should be checked as well.
+    def paused?
+      # This is nil if we're not currently paused.
+      !@started_at.nil?
+    end
+    def pause_duration
+      if paused?
+        Time.now - @started_at
+      else
+        0
+      end
+    end
+    # Whether the pause has expired.
+    def expired?
+      # We never expire the pause if timeout is nil.
+      return false if @timeout.nil?
+      # Have we passed the end of the pause duration?
+      @clock.now >= ends_at
+    end
+    # Resets the pause state, ensuring that the next pause is not exponential.
+    def reset!
+      @pauses = 0
+    end
+    private
+    def ends_at
+      # Apply an exponential backoff to the timeout.
+      backoff_factor = @exponential_backoff ? 2**(@pauses - 1) : 1
+      timeout = backoff_factor * @timeout
+      # If set, don't allow a timeout longer than max_timeout.
+      timeout = @max_timeout if @max_timeout && timeout > @max_timeout
+      @started_at + timeout
+    end
+  end
+end

data/lib/kafka/pending_message.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+module Kafka
+  class PendingMessage
+    attr_reader :value, :key, :headers, :topic, :partition, :partition_key, :create_time, :bytesize
+    def initialize(value:, key:, headers: {}, topic:, partition:, partition_key:, create_time:)
+      @value = value
+      @key = key
+      @headers = headers
+      @topic = topic
+      @partition = partition
+      @partition_key = partition_key
+      @create_time = create_time
+      @bytesize = key.to_s.bytesize + value.to_s.bytesize
+    end
+    def ==(other)
+      @value == other.value &&
+        @key == other.key &&
+        @topic == other.topic &&
+        @headers == other.headers &&
+        @partition == other.partition &&
+        @partition_key == other.partition_key &&
+        @create_time == other.create_time &&
+        @bytesize == other.bytesize
+    end
+  end
+end

data/lib/kafka/pending_message_queue.rb ADDED Viewed

@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+module Kafka
+  class PendingMessageQueue
+    attr_reader :size, :bytesize
+    def initialize
+      clear
+    end
+    def write(message)
+      @messages << message
+      @size += 1
+      @bytesize += message.bytesize
+    end
+    def empty?
+      @messages.empty?
+    end
+    def clear
+      @messages = []
+      @size = 0
+      @bytesize = 0
+    end
+    def replace(messages)
+      clear
+      messages.each {|message| write(message) }
+    end
+    # Yields each message in the queue.
+    #
+    # @yieldparam [PendingMessage] message
+    # @return [nil]
+    def each(&block)
+      @messages.each(&block)
+    end
+  end
+end

data/lib/kafka/produce_operation.rb ADDED Viewed

@@ -0,0 +1,205 @@
+# frozen_string_literal: true
+require "kafka/protocol/message_set"
+require "kafka/protocol/record_batch"
+module Kafka
+  # A produce operation attempts to send all messages in a buffer to the Kafka cluster.
+  # Since topics and partitions are spread among all brokers in a cluster, this usually
+  # involves sending requests to several or all of the brokers.
+  #
+  # ## Instrumentation
+  #
+  # When executing the operation, an `ack_message.producer.kafka` notification will be
+  # emitted for each message that was successfully appended to a topic partition.
+  # The following keys will be found in the payload:
+  #
+  # * `:topic` — the topic that was written to.
+  # * `:partition` — the partition that the message set was appended to.
+  # * `:offset` — the offset of the message in the partition.
+  # * `:key` — the message key.
+  # * `:value` — the message value.
+  # * `:delay` — the time between the message was produced and when it was acknowledged.
+  #
+  # In addition to these notifications, a `send_messages.producer.kafka` notification will
+  # be emitted after the operation completes, regardless of whether it succeeds. This
+  # notification will have the following keys:
+  #
+  # * `:message_count` – the total number of messages that the operation tried to
+  #   send. Note that not all messages may get delivered.
+  # * `:sent_message_count` – the number of messages that were successfully sent.
+  #
+  class ProduceOperation
+    def initialize(cluster:, transaction_manager:, buffer:, compressor:, required_acks:, ack_timeout:, logger:, instrumenter:)
+      @cluster = cluster
+      @transaction_manager = transaction_manager
+      @buffer = buffer
+      @required_acks = required_acks
+      @ack_timeout = ack_timeout
+      @compressor = compressor
+      @logger = TaggedLogger.new(logger)
+      @instrumenter = instrumenter
+    end
+    def execute
+      if (@transaction_manager.idempotent? || @transaction_manager.transactional?) && @required_acks != -1
+        raise 'You must set required_acks option to :all to use idempotent / transactional production'
+      end
+      if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
+        raise "Produce operation can only be executed in a pending transaction"
+      end
+      @instrumenter.instrument("send_messages.producer") do |notification|
+        message_count = @buffer.size
+        notification[:message_count] = message_count
+        begin
+          if @transaction_manager.idempotent? || @transaction_manager.transactional?
+            @transaction_manager.init_producer_id
+          end
+          send_buffered_messages
+        ensure
+          notification[:sent_message_count] = message_count - @buffer.size
+        end
+      end
+    end
+    private
+    def send_buffered_messages
+      messages_for_broker = {}
+      topic_partitions = {}
+      @buffer.each do |topic, partition, messages|
+        begin
+          broker = @cluster.get_leader(topic, partition)
+          @logger.debug "Current leader for #{topic}/#{partition} is node #{broker}"
+          topic_partitions[topic] ||= Set.new
+          topic_partitions[topic].add(partition)
+          messages_for_broker[broker] ||= MessageBuffer.new
+          messages_for_broker[broker].concat(messages, topic: topic, partition: partition)
+        rescue Kafka::Error => e
+          @logger.error "Could not connect to leader for partition #{topic}/#{partition}: #{e.message}"
+          @instrumenter.instrument("topic_error.producer", {
+            topic: topic,
+            exception: [e.class.to_s, e.message],
+          })
+          # We can't send the messages right now, so we'll just keep them in the buffer.
+          # We'll mark the cluster as stale in order to force a metadata refresh.
+          @cluster.mark_as_stale!
+        end
+      end
+      # Add topic and partition to transaction
+      if @transaction_manager.transactional?
+        @transaction_manager.add_partitions_to_transaction(topic_partitions)
+      end
+      messages_for_broker.each do |broker, message_buffer|
+        begin
+          @logger.info "Sending #{message_buffer.size} messages to #{broker}"
+          records_for_topics = {}
+          message_buffer.each do |topic, partition, records|
+            record_batch = Protocol::RecordBatch.new(
+              records: records,
+              first_sequence: @transaction_manager.next_sequence_for(
+                topic, partition
+              ),
+              in_transaction: @transaction_manager.transactional?,
+              producer_id: @transaction_manager.producer_id,
+              producer_epoch: @transaction_manager.producer_epoch
+            )
+            records_for_topics[topic] ||= {}
+            records_for_topics[topic][partition] = record_batch
+          end
+          response = broker.produce(
+            messages_for_topics: records_for_topics,
+            compressor: @compressor,
+            required_acks: @required_acks,
+            timeout: @ack_timeout * 1000, # Kafka expects the timeout in milliseconds.
+            transactional_id: @transaction_manager.transactional_id
+          )
+          handle_response(broker, response, records_for_topics) if response
+        rescue ConnectionError => e
+          @logger.error "Could not connect to broker #{broker}: #{e}"
+          # Mark the cluster as stale in order to force a cluster metadata refresh.
+          @cluster.mark_as_stale!
+        end
+      end
+    end
+    def handle_response(broker, response, records_for_topics)
+      response.each_partition do |topic_info, partition_info|
+        topic = topic_info.topic
+        partition = partition_info.partition
+        record_batch = records_for_topics[topic][partition]
+        records = record_batch.records
+        ack_time = Time.now
+        begin
+          begin
+            Protocol.handle_error(partition_info.error_code)
+          rescue ProtocolError => e
+            @instrumenter.instrument("topic_error.producer", {
+              topic: topic,
+              exception: [e.class.to_s, e.message],
+            })
+            raise e
+          end
+          if @transaction_manager.idempotent? || @transaction_manager.transactional?
+            @transaction_manager.update_sequence_for(
+              topic, partition, record_batch.first_sequence + record_batch.size
+            )
+          end
+          records.each_with_index do |record, index|
+            @instrumenter.instrument("ack_message.producer", {
+              key: record.key,
+              value: record.value,
+              topic: topic,
+              partition: partition,
+              offset: partition_info.offset + index,
+              delay: ack_time - record.create_time,
+            })
+          end
+        rescue Kafka::CorruptMessage
+          @logger.error "Corrupt message when writing to #{topic}/#{partition} on #{broker}"
+        rescue Kafka::UnknownTopicOrPartition
+          @logger.error "Unknown topic or partition #{topic}/#{partition} on #{broker}"
+          @cluster.mark_as_stale!
+        rescue Kafka::LeaderNotAvailable
+          @logger.error "Leader currently not available for #{topic}/#{partition}"
+          @cluster.mark_as_stale!
+        rescue Kafka::NotLeaderForPartition
+          @logger.error "Broker #{broker} not currently leader for #{topic}/#{partition}"
+          @cluster.mark_as_stale!
+        rescue Kafka::RequestTimedOut
+          @logger.error "Timed out while writing to #{topic}/#{partition} on #{broker}"
+        rescue Kafka::NotEnoughReplicas
+          @logger.error "Not enough in-sync replicas for #{topic}/#{partition}"
+        rescue Kafka::NotEnoughReplicasAfterAppend
+          @logger.error "Messages written, but to fewer in-sync replicas than required for #{topic}/#{partition}"
+        else
+          @logger.debug "Successfully appended #{records.count} messages to #{topic}/#{partition} on #{broker}"
+          # The messages were successfully written; clear them from the buffer.
+          @buffer.clear_messages(topic: topic, partition: partition)
+        end
+      end
+    end
+  end
+end

data/lib/kafka/producer.rb ADDED Viewed

@@ -0,0 +1,528 @@
+# frozen_string_literal: true
+require "set"
+require "kafka/partitioner"
+require "kafka/message_buffer"
+require "kafka/produce_operation"
+require "kafka/pending_message_queue"
+require "kafka/pending_message"
+require "kafka/compressor"
+require "kafka/interceptors"
+module Kafka
+  # Allows sending messages to a Kafka cluster.
+  #
+  # Typically you won't instantiate this class yourself, but rather have {Kafka::Client}
+  # do it for you, e.g.
+  #
+  #     # Will instantiate Kafka::Client
+  #     kafka = Kafka.new(["kafka1:9092", "kafka2:9092"])
+  #
+  #     # Will instantiate Kafka::Producer
+  #     producer = kafka.producer
+  #
+  # This is done in order to share a logger as well as a pool of broker connections across
+  # different producers. This also means that you don't need to pass the `cluster` and
+  # `logger` options to `#producer`. See {#initialize} for the list of other options
+  # you can pass in.
+  #
+  # ## Buffering
+  #
+  # The producer buffers pending messages until {#deliver_messages} is called. Note that there is
+  # a maximum buffer size (default is 1,000 messages) and writing messages after the
+  # buffer has reached this size will result in a BufferOverflow exception. Make sure
+  # to periodically call {#deliver_messages} or set `max_buffer_size` to an appropriate value.
+  #
+  # Buffering messages and sending them in batches greatly improves performance, so
+  # try to avoid sending messages after every write. The tradeoff between throughput and
+  # message delays depends on your use case.
+  #
+  # ## Error Handling and Retries
+  #
+  # The design of the error handling is based on having a {MessageBuffer} hold messages
+  # for all topics/partitions. Whenever we want to send messages to the cluster, we
+  # group the buffered messages by the broker they need to be sent to and fire off a
+  # request to each broker. A request can be a partial success, so we go through the
+  # response and inspect the error code for each partition that we wrote to. If the
+  # write to a given partition was successful, we clear the corresponding messages
+  # from the buffer -- otherwise, we log the error and keep the messages in the buffer.
+  #
+  # After this, we check if the buffer is empty. If it is, we're all done. If it's
+  # not, we do another round of requests, this time with just the remaining messages.
+  # We do this for as long as `max_retries` permits.
+  #
+  # ## Compression
+  #
+  # Depending on what kind of data you produce, enabling compression may yield improved
+  # bandwidth and space usage. Compression in Kafka is done on entire messages sets
+  # rather than on individual messages. This improves the compression rate and generally
+  # means that compressions works better the larger your buffers get, since the message
+  # sets will be larger by the time they're compressed.
+  #
+  # Since many workloads have variations in throughput and distribution across partitions,
+  # it's possible to configure a threshold for when to enable compression by setting
+  # `compression_threshold`. Only if the defined number of messages are buffered for a
+  # partition will the messages be compressed.
+  #
+  # Compression is enabled by passing the `compression_codec` parameter with the
+  # name of one of the algorithms allowed by Kafka:
+  #
+  # * `:snappy` for [Snappy](http://google.github.io/snappy/) compression.
+  # * `:gzip` for [gzip](https://en.wikipedia.org/wiki/Gzip) compression.
+  # * `:lz4` for [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression.
+  # * `:zstd` for [zstd](https://facebook.github.io/zstd/) compression.
+  #
+  # By default, all message sets will be compressed if you specify a compression
+  # codec. To increase the compression threshold, set `compression_threshold` to
+  # an integer value higher than one.
+  #
+  # ## Instrumentation
+  #
+  # Whenever {#produce} is called, the notification `produce_message.producer.kafka`
+  # will be emitted with the following payload:
+  #
+  # * `value` – the message value.
+  # * `key` – the message key.
+  # * `topic` – the topic that was produced to.
+  # * `buffer_size` – the buffer size after adding the message.
+  # * `max_buffer_size` – the maximum allowed buffer size for the producer.
+  #
+  # After {#deliver_messages} completes, the notification
+  # `deliver_messages.producer.kafka` will be emitted with the following payload:
+  #
+  # * `message_count` – the total number of messages that the producer tried to
+  #   deliver. Note that not all messages may get delivered.
+  # * `delivered_message_count` – the number of messages that were successfully
+  #   delivered.
+  # * `attempts` – the number of attempts made to deliver the messages.
+  #
+  # ## Example
+  #
+  # This is an example of an application which reads lines from stdin and writes them
+  # to Kafka:
+  #
+  #     require "kafka"
+  #
+  #     logger = Logger.new($stderr)
+  #     brokers = ENV.fetch("KAFKA_BROKERS").split(",")
+  #
+  #     # Make sure to create this topic in your Kafka cluster or configure the
+  #     # cluster to auto-create topics.
+  #     topic = "random-messages"
+  #
+  #     kafka = Kafka.new(brokers, client_id: "simple-producer", logger: logger)
+  #     producer = kafka.producer
+  #
+  #     begin
+  #       $stdin.each_with_index do |line, index|
+  #         producer.produce(line, topic: topic)
+  #
+  #         # Send messages for every 10 lines.
+  #         producer.deliver_messages if index % 10 == 0
+  #       end
+  #     ensure
+  #       # Make sure to send any remaining messages.
+  #       producer.deliver_messages
+  #
+  #       producer.shutdown
+  #     end
+  #
+  class Producer
+    class AbortTransaction < StandardError; end
+    def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
+                   required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
+                   max_buffer_bytesize:, partitioner:, interceptors: [])
+      @cluster = cluster
+      @transaction_manager = transaction_manager
+      @logger = TaggedLogger.new(logger)
+      @instrumenter = instrumenter
+      @required_acks = required_acks == :all ? -1 : required_acks
+      @ack_timeout = ack_timeout
+      @max_retries = max_retries
+      @retry_backoff = retry_backoff
+      @max_buffer_size = max_buffer_size
+      @max_buffer_bytesize = max_buffer_bytesize
+      @compressor = compressor
+      @partitioner = partitioner
+      @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
+      # The set of topics that are produced to.
+      @target_topics = Set.new
+      # A buffer organized by topic/partition.
+      @buffer = MessageBuffer.new
+      # Messages added by `#produce` but not yet assigned a partition.
+      @pending_message_queue = PendingMessageQueue.new
+    end
+    def to_s
+      "Producer #{@target_topics.to_a.join(', ')}"
+    end
+    # Produces a message to the specified topic. Note that messages are buffered in
+    # the producer until {#deliver_messages} is called.
+    #
+    # ## Partitioning
+    #
+    # There are several options for specifying the partition that the message should
+    # be written to.
+    #
+    # The simplest option is to not specify a message key, partition key, or
+    # partition number, in which case the message will be assigned a partition at
+    # random.
+    #
+    # You can also specify the `partition` parameter yourself. This requires you to
+    # know which partitions are available, however. Oftentimes the best option is
+    # to specify the `partition_key` parameter: messages with the same partition
+    # key will always be assigned to the same partition, as long as the number of
+    # partitions doesn't change. You can also omit the partition key and specify
+    # a message key instead. The message key is part of the message payload, and
+    # so can carry semantic value--whether you want to have the message key double
+    # as a partition key is up to you.
+    #
+    # @param value [String] the message data.
+    # @param key [String] the message key.
+    # @param headers [Hash<String, String>] the headers for the message.
+    # @param topic [String] the topic that the message should be written to.
+    # @param partition [Integer] the partition that the message should be written to.
+    # @param partition_key [String] the key that should be used to assign a partition.
+    # @param create_time [Time] the timestamp that should be set on the message.
+    #
+    # @raise [BufferOverflow] if the maximum buffer size has been reached.
+    # @return [nil]
+    def produce(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, create_time: Time.now)
+      # We want to fail fast if `topic` isn't a String
+      topic = topic.to_str
+      message = @interceptors.call(PendingMessage.new(
+        value: value && value.to_s,
+        key: key && key.to_s,
+        headers: headers,
+        topic: topic,
+        partition: partition && Integer(partition),
+        partition_key: partition_key && partition_key.to_s,
+        create_time: create_time
+      ))
+      if buffer_size >= @max_buffer_size
+        buffer_overflow topic,
+          "Cannot produce to #{topic}, max buffer size (#{@max_buffer_size} messages) reached"
+      end
+      if buffer_bytesize + message.bytesize >= @max_buffer_bytesize
+        buffer_overflow topic,
+          "Cannot produce to #{topic}, max buffer bytesize (#{@max_buffer_bytesize} bytes) reached"
+      end
+      # If the producer is in transactional mode, all the message production
+      # must be used when the producer is currently in transaction
+      if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
+        raise "Cannot produce to #{topic}: You must trigger begin_transaction before producing messages"
+      end
+      @target_topics.add(topic)
+      @pending_message_queue.write(message)
+      @instrumenter.instrument("produce_message.producer", {
+        value: value,
+        key: key,
+        topic: topic,
+        create_time: create_time,
+        message_size: message.bytesize,
+        buffer_size: buffer_size,
+        max_buffer_size: @max_buffer_size,
+      })
+      nil
+    end
+    # Sends all buffered messages to the Kafka brokers.
+    #
+    # Depending on the value of `required_acks` used when initializing the producer,
+    # this call may block until the specified number of replicas have acknowledged
+    # the writes. The `ack_timeout` setting places an upper bound on the amount of
+    # time the call will block before failing.
+    #
+    # @raise [DeliveryFailed] if not all messages could be successfully sent.
+    # @return [nil]
+    def deliver_messages
+      # There's no need to do anything if the buffer is empty.
+      return if buffer_size == 0
+      @instrumenter.instrument("deliver_messages.producer") do |notification|
+        message_count = buffer_size
+        notification[:message_count] = message_count
+        notification[:attempts] = 0
+        begin
+          deliver_messages_with_retries(notification)
+        ensure
+          notification[:delivered_message_count] = message_count - buffer_size
+        end
+      end
+    end
+    # Returns the number of messages currently held in the buffer.
+    #
+    # @return [Integer] buffer size.
+    def buffer_size
+      @pending_message_queue.size + @buffer.size
+    end
+    def buffer_bytesize
+      @pending_message_queue.bytesize + @buffer.bytesize
+    end
+    # Deletes all buffered messages.
+    #
+    # @return [nil]
+    def clear_buffer
+      @buffer.clear
+      @pending_message_queue.clear
+    end
+    # Closes all connections to the brokers.
+    #
+    # @return [nil]
+    def shutdown
+      @transaction_manager.close
+      @cluster.disconnect
+    end
+    # Initializes the producer to ready for future transactions. This method
+    # should be triggered once, before any tranactions are created.
+    #
+    # @return [nil]
+    def init_transactions
+      @transaction_manager.init_transactions
+    end
+    # Mark the beginning of a transaction. This method transitions the state
+    # of the transaction trantiions to IN_TRANSACTION.
+    #
+    # All producing operations can only be executed while the transation is
+    # in this state. The records are persisted by Kafka brokers, but not visible
+    # the consumers until the #commit_transaction method is trigger. After a
+    # timeout period without committed, the transaction is timeout and
+    # considered as aborted.
+    #
+    # @return [nil]
+    def begin_transaction
+      @transaction_manager.begin_transaction
+    end
+    # This method commits the pending transaction, marks all the produced
+    # records committed. After that, they are visible to the consumers.
+    #
+    # This method can only be called if and only if the current transaction
+    # is at IN_TRANSACTION state.
+    #
+    # @return [nil]
+    def commit_transaction
+      @transaction_manager.commit_transaction
+    end
+    # This method abort the pending transaction, marks all the produced
+    # records aborted. All the records will be wiped out by the brokers and the
+    # cosumers don't have a chance to consume those messages, except they enable
+    # consuming uncommitted option.
+    #
+    # This method can only be called if and only if the current transaction
+    # is at IN_TRANSACTION state.
+    #
+    # @return [nil]
+    def abort_transaction
+      @transaction_manager.abort_transaction
+    end
+    # Sends batch last offset to the consumer group coordinator, and also marks
+    # this offset as part of the current transaction. This offset will be considered
+    # committed only if the transaction is committed successfully.
+    #
+    # This method should be used when you need to batch consumed and produced messages
+    # together, typically in a consume-transform-produce pattern. Thus, the specified
+    # group_id should be the same as config parameter group_id of the used
+    # consumer.
+    #
+    # @return [nil]
+    def send_offsets_to_transaction(batch:, group_id:)
+      @transaction_manager.send_offsets_to_txn(offsets: { batch.topic => { batch.partition => { offset: batch.last_offset + 1, leader_epoch: batch.leader_epoch } } }, group_id: group_id)
+    end
+    # Syntactic sugar to enable easier transaction usage. Do the following steps
+    #
+    # - Start the transaction (with Producer#begin_transaction)
+    # - Yield the given block
+    # - Commit the transaction (with Producer#commit_transaction)
+    #
+    # If the block raises exception, the transaction is automatically aborted
+    # *before* bubble up the exception.
+    #
+    # If the block raises Kafka::Producer::AbortTransaction indicator exception,
+    # it aborts the transaction silently, without throwing up that exception.
+    #
+    # @return [nil]
+    def transaction
+      raise 'This method requires a block' unless block_given?
+      begin_transaction
+      yield
+      commit_transaction
+    rescue Kafka::Producer::AbortTransaction
+      abort_transaction
+    rescue
+      abort_transaction
+      raise
+    end
+    private
+    def deliver_messages_with_retries(notification)
+      attempt = 0
+      @cluster.add_target_topics(@target_topics)
+      operation = ProduceOperation.new(
+        cluster: @cluster,
+        transaction_manager: @transaction_manager,
+        buffer: @buffer,
+        required_acks: @required_acks,
+        ack_timeout: @ack_timeout,
+        compressor: @compressor,
+        logger: @logger,
+        instrumenter: @instrumenter,
+      )
+      loop do
+        attempt += 1
+        notification[:attempts] = attempt
+        begin
+          @cluster.refresh_metadata_if_necessary!
+        rescue ConnectionError => e
+          raise DeliveryFailed.new(e, buffer_messages)
+        end
+        assign_partitions!
+        operation.execute
+        if @required_acks.zero?
+          # No response is returned by the brokers, so we can't know which messages
+          # have been successfully written. Our only option is to assume that they all
+          # have.
+          @buffer.clear
+        end
+        if buffer_size.zero?
+          break
+        elsif attempt <= @max_retries
+          @logger.warn "Failed to send all messages to #{pretty_partitions}; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
+          sleep @retry_backoff
+        else
+          @logger.error "Failed to send all messages to #{pretty_partitions}; keeping remaining messages in buffer"
+          break
+        end
+      end
+      unless @pending_message_queue.empty?
+        # Mark the cluster as stale in order to force a cluster metadata refresh.
+        @cluster.mark_as_stale!
+        raise DeliveryFailed.new("Failed to assign partitions to #{@pending_message_queue.size} messages", buffer_messages)
+      end
+      unless @buffer.empty?
+        raise DeliveryFailed.new("Failed to send messages to #{pretty_partitions}", buffer_messages)
+      end
+    end
+    def pretty_partitions
+      @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
+    end
+    def assign_partitions!
+      failed_messages = []
+      topics_with_failures = Set.new
+      @pending_message_queue.each do |message|
+        partition = message.partition
+        begin
+          # If a message for a topic fails to receive a partition all subsequent
+          # messages for the topic should be retried to preserve ordering
+          if topics_with_failures.include?(message.topic)
+            failed_messages << message
+            next
+          end
+          if partition.nil?
+            partition_count = @cluster.partitions_for(message.topic).count
+            partition = @partitioner.call(partition_count, message)
+          end
+          @buffer.write(
+            value: message.value,
+            key: message.key,
+            headers: message.headers,
+            topic: message.topic,
+            partition: partition,
+            create_time: message.create_time,
+          )
+        rescue Kafka::Error => e
+          @instrumenter.instrument("topic_error.producer", {
+            topic: message.topic,
+            exception: [e.class.to_s, e.message],
+          })
+          topics_with_failures << message.topic
+          failed_messages << message
+        end
+      end
+      if failed_messages.any?
+        failed_messages.group_by(&:topic).each do |topic, messages|
+          @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
+        end
+        @cluster.mark_as_stale!
+      end
+      @pending_message_queue.replace(failed_messages)
+    end
+    def buffer_messages
+      messages = []
+      @pending_message_queue.each do |message|
+        messages << message
+      end
+      @buffer.each do |topic, partition, messages_for_partition|
+        messages_for_partition.each do |message|
+          messages << PendingMessage.new(
+            value: message.value,
+            key: message.key,
+            headers: message.headers,
+            topic: topic,
+            partition: partition,
+            partition_key: nil,
+            create_time: message.create_time
+          )
+        end
+      end
+      messages
+    end
+    def buffer_overflow(topic, message)
+      @instrumenter.instrument("buffer_overflow.producer", {
+        topic: topic,
+      })
+      raise BufferOverflow, message
+    end
+  end
+end