RubyGems - ruby-kafka-temp-fork - Versions diffs - 0.0.1 - Mend

ruby-kafka-temp-fork 0.0.1

Files changed (144) hide show

checksums.yaml +7 -0
data/.circleci/config.yml +393 -0
data/.github/workflows/stale.yml +19 -0
data/.gitignore +13 -0
data/.readygo +1 -0
data/.rspec +3 -0
data/.rubocop.yml +44 -0
data/.ruby-version +1 -0
data/.yardopts +3 -0
data/CHANGELOG.md +310 -0
data/Gemfile +5 -0
data/ISSUE_TEMPLATE.md +23 -0
data/LICENSE.txt +176 -0
data/Procfile +2 -0
data/README.md +1342 -0
data/Rakefile +8 -0
data/benchmarks/message_encoding.rb +23 -0
data/bin/console +8 -0
data/bin/setup +5 -0
data/docker-compose.yml +39 -0
data/examples/consumer-group.rb +35 -0
data/examples/firehose-consumer.rb +64 -0
data/examples/firehose-producer.rb +54 -0
data/examples/simple-consumer.rb +34 -0
data/examples/simple-producer.rb +42 -0
data/examples/ssl-producer.rb +44 -0
data/lib/kafka.rb +373 -0
data/lib/kafka/async_producer.rb +291 -0
data/lib/kafka/broker.rb +217 -0
data/lib/kafka/broker_info.rb +16 -0
data/lib/kafka/broker_pool.rb +41 -0
data/lib/kafka/broker_uri.rb +43 -0
data/lib/kafka/client.rb +833 -0
data/lib/kafka/cluster.rb +513 -0
data/lib/kafka/compression.rb +45 -0
data/lib/kafka/compressor.rb +86 -0
data/lib/kafka/connection.rb +223 -0
data/lib/kafka/connection_builder.rb +33 -0
data/lib/kafka/consumer.rb +642 -0
data/lib/kafka/consumer_group.rb +231 -0
data/lib/kafka/consumer_group/assignor.rb +63 -0
data/lib/kafka/crc32_hash.rb +15 -0
data/lib/kafka/datadog.rb +420 -0
data/lib/kafka/digest.rb +22 -0
data/lib/kafka/fetch_operation.rb +115 -0
data/lib/kafka/fetched_batch.rb +58 -0
data/lib/kafka/fetched_batch_generator.rb +120 -0
data/lib/kafka/fetched_message.rb +48 -0
data/lib/kafka/fetched_offset_resolver.rb +48 -0
data/lib/kafka/fetcher.rb +224 -0
data/lib/kafka/gzip_codec.rb +34 -0
data/lib/kafka/heartbeat.rb +25 -0
data/lib/kafka/instrumenter.rb +38 -0
data/lib/kafka/interceptors.rb +33 -0
data/lib/kafka/lz4_codec.rb +27 -0
data/lib/kafka/message_buffer.rb +87 -0
data/lib/kafka/murmur2_hash.rb +17 -0
data/lib/kafka/offset_manager.rb +259 -0
data/lib/kafka/partitioner.rb +40 -0
data/lib/kafka/pause.rb +92 -0
data/lib/kafka/pending_message.rb +29 -0
data/lib/kafka/pending_message_queue.rb +41 -0
data/lib/kafka/produce_operation.rb +205 -0
data/lib/kafka/producer.rb +528 -0
data/lib/kafka/prometheus.rb +316 -0
data/lib/kafka/protocol.rb +225 -0
data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
data/lib/kafka/protocol/alter_configs_request.rb +44 -0
data/lib/kafka/protocol/alter_configs_response.rb +49 -0
data/lib/kafka/protocol/api_versions_request.rb +21 -0
data/lib/kafka/protocol/api_versions_response.rb +53 -0
data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
data/lib/kafka/protocol/create_partitions_request.rb +42 -0
data/lib/kafka/protocol/create_partitions_response.rb +28 -0
data/lib/kafka/protocol/create_topics_request.rb +45 -0
data/lib/kafka/protocol/create_topics_response.rb +26 -0
data/lib/kafka/protocol/decoder.rb +175 -0
data/lib/kafka/protocol/delete_topics_request.rb +33 -0
data/lib/kafka/protocol/delete_topics_response.rb +26 -0
data/lib/kafka/protocol/describe_configs_request.rb +35 -0
data/lib/kafka/protocol/describe_configs_response.rb +73 -0
data/lib/kafka/protocol/describe_groups_request.rb +27 -0
data/lib/kafka/protocol/describe_groups_response.rb +73 -0
data/lib/kafka/protocol/encoder.rb +184 -0
data/lib/kafka/protocol/end_txn_request.rb +29 -0
data/lib/kafka/protocol/end_txn_response.rb +19 -0
data/lib/kafka/protocol/fetch_request.rb +70 -0
data/lib/kafka/protocol/fetch_response.rb +136 -0
data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
data/lib/kafka/protocol/heartbeat_request.rb +27 -0
data/lib/kafka/protocol/heartbeat_response.rb +17 -0
data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
data/lib/kafka/protocol/join_group_request.rb +47 -0
data/lib/kafka/protocol/join_group_response.rb +41 -0
data/lib/kafka/protocol/leave_group_request.rb +25 -0
data/lib/kafka/protocol/leave_group_response.rb +17 -0
data/lib/kafka/protocol/list_groups_request.rb +23 -0
data/lib/kafka/protocol/list_groups_response.rb +35 -0
data/lib/kafka/protocol/list_offset_request.rb +53 -0
data/lib/kafka/protocol/list_offset_response.rb +89 -0
data/lib/kafka/protocol/member_assignment.rb +42 -0
data/lib/kafka/protocol/message.rb +172 -0
data/lib/kafka/protocol/message_set.rb +55 -0
data/lib/kafka/protocol/metadata_request.rb +31 -0
data/lib/kafka/protocol/metadata_response.rb +185 -0
data/lib/kafka/protocol/offset_commit_request.rb +47 -0
data/lib/kafka/protocol/offset_commit_response.rb +29 -0
data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
data/lib/kafka/protocol/produce_request.rb +94 -0
data/lib/kafka/protocol/produce_response.rb +63 -0
data/lib/kafka/protocol/record.rb +88 -0
data/lib/kafka/protocol/record_batch.rb +223 -0
data/lib/kafka/protocol/request_message.rb +26 -0
data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
data/lib/kafka/protocol/sync_group_request.rb +33 -0
data/lib/kafka/protocol/sync_group_response.rb +26 -0
data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
data/lib/kafka/sasl/gssapi.rb +76 -0
data/lib/kafka/sasl/oauth.rb +64 -0
data/lib/kafka/sasl/plain.rb +39 -0
data/lib/kafka/sasl/scram.rb +180 -0
data/lib/kafka/sasl_authenticator.rb +61 -0
data/lib/kafka/snappy_codec.rb +29 -0
data/lib/kafka/socket_with_timeout.rb +96 -0
data/lib/kafka/ssl_context.rb +66 -0
data/lib/kafka/ssl_socket_with_timeout.rb +188 -0
data/lib/kafka/statsd.rb +296 -0
data/lib/kafka/tagged_logger.rb +77 -0
data/lib/kafka/transaction_manager.rb +306 -0
data/lib/kafka/transaction_state_machine.rb +72 -0
data/lib/kafka/version.rb +5 -0
data/lib/kafka/zstd_codec.rb +27 -0
data/lib/ruby-kafka-temp-fork.rb +5 -0
data/ruby-kafka-temp-fork.gemspec +54 -0
metadata +520 -0

data/lib/kafka/compression.rb ADDED Viewed

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+require "kafka/snappy_codec"
+require "kafka/gzip_codec"
+require "kafka/lz4_codec"
+require "kafka/zstd_codec"
+module Kafka
+  module Compression
+    CODECS_BY_NAME = {
+      :gzip => GzipCodec.new,
+      :snappy => SnappyCodec.new,
+      :lz4 => LZ4Codec.new,
+      :zstd => ZstdCodec.new,
+    }.freeze
+    CODECS_BY_ID = CODECS_BY_NAME.each_with_object({}) do |(_, codec), hash|
+      hash[codec.codec_id] = codec
+    end.freeze
+    def self.codecs
+      CODECS_BY_NAME.keys
+    end
+    def self.find_codec(name)
+      codec = CODECS_BY_NAME.fetch(name) do
+        raise "Unknown compression codec #{name}"
+      end
+      codec.load
+      codec
+    end
+    def self.find_codec_by_id(codec_id)
+      codec = CODECS_BY_ID.fetch(codec_id) do
+        raise "Unknown codec id #{codec_id}"
+      end
+      codec.load
+      codec
+    end
+  end
+end

data/lib/kafka/compressor.rb ADDED Viewed

@@ -0,0 +1,86 @@
+# frozen_string_literal: true
+require "kafka/compression"
+module Kafka
+  # Compresses message sets using a specified codec.
+  #
+  # A message set is only compressed if its size meets the defined threshold.
+  #
+  # ## Instrumentation
+  #
+  # Whenever a message set is compressed, the notification
+  # `compress.compressor.kafka` will be emitted with the following payload:
+  #
+  # * `message_count` – the number of messages in the message set.
+  # * `uncompressed_bytesize` – the byte size of the original data.
+  # * `compressed_bytesize` – the byte size of the compressed data.
+  #
+  class Compressor
+    attr_reader :codec
+    # @param codec_name [Symbol, nil]
+    # @param threshold [Integer] the minimum number of messages in a message set
+    #   that will trigger compression.
+    def initialize(codec_name: nil, threshold: 1, instrumenter:)
+      # Codec may be nil, in which case we won't compress.
+      @codec = codec_name && Compression.find_codec(codec_name)
+      @threshold = threshold
+      @instrumenter = instrumenter
+    end
+    # @param record_batch [Protocol::RecordBatch]
+    # @param offset [Integer] used to simulate broker behaviour in tests
+    # @return [Protocol::RecordBatch]
+    def compress(record_batch, offset: -1)
+      if record_batch.is_a?(Protocol::RecordBatch)
+        compress_record_batch(record_batch)
+      else
+        # Deprecated message set format
+        compress_message_set(record_batch, offset)
+      end
+    end
+    private
+    def compress_message_set(message_set, offset)
+      return message_set if @codec.nil? || message_set.size < @threshold
+      data = Protocol::Encoder.encode_with(message_set)
+      compressed_data = @codec.compress(data)
+      @instrumenter.instrument("compress.compressor") do |notification|
+        notification[:message_count] = message_set.size
+        notification[:uncompressed_bytesize] = data.bytesize
+        notification[:compressed_bytesize] = compressed_data.bytesize
+      end
+      wrapper_message = Protocol::Message.new(
+        value: compressed_data,
+        codec_id: @codec.codec_id,
+        offset: offset
+      )
+      Protocol::MessageSet.new(messages: [wrapper_message])
+    end
+    def compress_record_batch(record_batch)
+      if @codec.nil? || record_batch.size < @threshold
+        record_batch.codec_id = 0
+        return Protocol::Encoder.encode_with(record_batch)
+      end
+      record_batch.codec_id = @codec.codec_id
+      data = Protocol::Encoder.encode_with(record_batch)
+      @instrumenter.instrument("compress.compressor") do |notification|
+        notification[:message_count] = record_batch.size
+        notification[:compressed_bytesize] = data.bytesize
+      end
+      data
+    end
+  end
+end

data/lib/kafka/connection.rb ADDED Viewed

@@ -0,0 +1,223 @@
+# frozen_string_literal: true
+require "stringio"
+require "kafka/socket_with_timeout"
+require "kafka/ssl_socket_with_timeout"
+require "kafka/protocol/request_message"
+require "kafka/protocol/encoder"
+require "kafka/protocol/decoder"
+module Kafka
+  # A connection to a single Kafka broker.
+  #
+  # Usually you'll need a separate connection to each broker in a cluster, since most
+  # requests must be directed specifically to the broker that is currently leader for
+  # the set of topic partitions you want to produce to or consume from.
+  #
+  # ## Instrumentation
+  #
+  # Connections emit a `request.connection.kafka` notification on each request. The following
+  # keys will be found in the payload:
+  #
+  # * `:api` — the name of the API being invoked.
+  # * `:request_size` — the number of bytes in the request.
+  # * `:response_size` — the number of bytes in the response.
+  #
+  # The notification also includes the duration of the request.
+  #
+  class Connection
+    SOCKET_TIMEOUT = 10
+    CONNECT_TIMEOUT = 10
+    # Time after which an idle connection will be reopened.
+    IDLE_TIMEOUT = 60 * 5
+    attr_reader :encoder
+    attr_reader :decoder
+    # Opens a connection to a Kafka broker.
+    #
+    # @param host [String] the hostname of the broker.
+    # @param port [Integer] the port of the broker.
+    # @param client_id [String] the client id is a user-specified string sent in each
+    #   request to help trace calls and should logically identify the application
+    #   making the request.
+    # @param logger [Logger] the logger used to log trace messages.
+    # @param connect_timeout [Integer] the socket timeout for connecting to the broker.
+    #   Default is 10 seconds.
+    # @param socket_timeout [Integer] the socket timeout for reading and writing to the
+    #   broker. Default is 10 seconds.
+    #
+    # @return [Connection] a new connection.
+    def initialize(host:, port:, client_id:, logger:, instrumenter:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
+      @host, @port, @client_id = host, port, client_id
+      @logger = TaggedLogger.new(logger)
+      @instrumenter = instrumenter
+      @connect_timeout = connect_timeout || CONNECT_TIMEOUT
+      @socket_timeout = socket_timeout || SOCKET_TIMEOUT
+      @ssl_context = ssl_context
+      @socket = nil
+      @last_request = nil
+    end
+    def to_s
+      "#{@host}:#{@port}"
+    end
+    def open?
+      !@socket.nil? && !@socket.closed?
+    end
+    def close
+      @logger.debug "Closing socket to #{to_s}"
+      @socket.close if @socket
+    end
+    # Sends a request over the connection.
+    #
+    # @param request [#encode, #response_class] the request that should be
+    #   encoded and written.
+    #
+    # @return [Object] the response.
+    def send_request(request)
+      api_name = Protocol.api_name(request.api_key)
+      # Default notification payload.
+      notification = {
+        broker_host: @host,
+        api: api_name,
+        request_size: 0,
+        response_size: 0,
+      }
+      raise IdleConnection if idle?
+      @logger.push_tags(api_name)
+      @instrumenter.instrument("request.connection", notification) do
+        open unless open?
+        @correlation_id += 1
+        @logger.debug "Sending #{api_name} API request #{@correlation_id} to #{to_s}"
+        write_request(request, notification)
+        response_class = request.response_class
+        response = wait_for_response(response_class, notification) unless response_class.nil?
+        @last_request = Time.now
+        response
+      end
+    rescue SystemCallError, EOFError, IOError => e
+      close
+      raise ConnectionError, "Connection error #{e.class}: #{e}"
+    ensure
+      @logger.pop_tags
+    end
+    private
+    def open
+      @logger.debug "Opening connection to #{@host}:#{@port} with client id #{@client_id}..."
+      if @ssl_context
+        @socket = SSLSocketWithTimeout.new(@host, @port, connect_timeout: @connect_timeout, timeout: @socket_timeout, ssl_context: @ssl_context)
+      else
+        @socket = SocketWithTimeout.new(@host, @port, connect_timeout: @connect_timeout, timeout: @socket_timeout)
+      end
+      @encoder = Kafka::Protocol::Encoder.new(@socket)
+      @decoder = Kafka::Protocol::Decoder.new(@socket)
+      # Correlation id is initialized to zero and bumped for each request.
+      @correlation_id = 0
+      @last_request = nil
+    rescue Errno::ETIMEDOUT => e
+      @logger.error "Timed out while trying to connect to #{self}: #{e}"
+      raise ConnectionError, e
+    rescue SocketError, Errno::ECONNREFUSED, Errno::EHOSTUNREACH => e
+      @logger.error "Failed to connect to #{self}: #{e}"
+      raise ConnectionError, e
+    end
+    def idle?
+      @last_request && @last_request < Time.now - IDLE_TIMEOUT
+    end
+    # Writes a request over the connection.
+    #
+    # @param request [#encode] the request that should be encoded and written.
+    #
+    # @return [nil]
+    def write_request(request, notification)
+      message = Kafka::Protocol::RequestMessage.new(
+        api_key: request.api_key,
+        api_version: request.respond_to?(:api_version) ? request.api_version : 0,
+        correlation_id: @correlation_id,
+        client_id: @client_id,
+        request: request,
+      )
+      data = Kafka::Protocol::Encoder.encode_with(message)
+      notification[:request_size] = data.bytesize
+      @encoder.write_bytes(data)
+      nil
+    rescue Errno::ETIMEDOUT
+      @logger.error "Timed out while writing request #{@correlation_id}"
+      raise
+    end
+    # Reads a response from the connection.
+    #
+    # @param response_class [#decode] an object that can decode the response from
+    #   a given Decoder.
+    #
+    # @return [nil]
+    def read_response(response_class, notification)
+      @logger.debug "Waiting for response #{@correlation_id} from #{to_s}"
+      data = @decoder.bytes
+      notification[:response_size] = data.bytesize
+      buffer = StringIO.new(data)
+      response_decoder = Kafka::Protocol::Decoder.new(buffer)
+      correlation_id = response_decoder.int32
+      response = response_class.decode(response_decoder)
+      @logger.debug "Received response #{correlation_id} from #{to_s}"
+      return correlation_id, response
+    rescue Errno::ETIMEDOUT
+      @logger.error "Timed out while waiting for response #{@correlation_id}"
+      raise
+    end
+    def wait_for_response(response_class, notification)
+      loop do
+        correlation_id, response = read_response(response_class, notification)
+        # There may have been a previous request that timed out before the client
+        # was able to read the response. In that case, the response will still be
+        # sitting in the socket waiting to be read. If the response we just read
+        # was to a previous request, we can safely skip it.
+        if correlation_id < @correlation_id
+          @logger.error "Received out-of-order response id #{correlation_id}, was expecting #{@correlation_id}"
+        elsif correlation_id > @correlation_id
+          raise Kafka::Error, "Correlation id mismatch: expected #{@correlation_id} but got #{correlation_id}"
+        else
+          return response
+        end
+      end
+    end
+  end
+end

data/lib/kafka/connection_builder.rb ADDED Viewed

@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+module Kafka
+  class ConnectionBuilder
+    def initialize(client_id:, logger:, instrumenter:, connect_timeout:, socket_timeout:, ssl_context:, sasl_authenticator:)
+      @client_id = client_id
+      @logger = TaggedLogger.new(logger)
+      @instrumenter = instrumenter
+      @connect_timeout = connect_timeout
+      @socket_timeout = socket_timeout
+      @ssl_context = ssl_context
+      @sasl_authenticator = sasl_authenticator
+    end
+    def build_connection(host, port)
+      connection = Connection.new(
+        host: host,
+        port: port,
+        client_id: @client_id,
+        connect_timeout: @connect_timeout,
+        socket_timeout: @socket_timeout,
+        logger: @logger,
+        instrumenter: @instrumenter,
+        ssl_context: @ssl_context,
+      )
+      @sasl_authenticator.authenticate!(connection)
+      connection
+    end
+  end
+end

data/lib/kafka/consumer.rb ADDED Viewed

@@ -0,0 +1,642 @@
+# frozen_string_literal: true
+require "kafka/consumer_group"
+require "kafka/interceptors"
+require "kafka/offset_manager"
+require "kafka/fetcher"
+require "kafka/pause"
+module Kafka
+  # A client that consumes messages from a Kafka cluster in coordination with
+  # other clients.
+  #
+  # A Consumer subscribes to one or more Kafka topics; all consumers with the
+  # same *group id* then agree on who should read from the individual topic
+  # partitions. When group members join or leave, the group synchronizes,
+  # making sure that all partitions are assigned to a single member, and that
+  # all members have some partitions to read from.
+  #
+  # ## Example
+  #
+  # A simple producer that simply writes the messages it consumes to the
+  # console.
+  #
+  #     require "kafka"
+  #
+  #     kafka = Kafka.new(["kafka1:9092", "kafka2:9092"])
+  #
+  #     # Create a new Consumer instance in the group `my-group`:
+  #     consumer = kafka.consumer(group_id: "my-group")
+  #
+  #     # Subscribe to a Kafka topic:
+  #     consumer.subscribe("messages")
+  #
+  #     # Loop forever, reading in messages from all topics that have been
+  #     # subscribed to.
+  #     consumer.each_message do |message|
+  #       puts message.topic
+  #       puts message.partition
+  #       puts message.key
+  #       puts message.headers
+  #       puts message.value
+  #       puts message.offset
+  #     end
+  #
+  class Consumer
+    def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
+                   session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
+      @cluster = cluster
+      @logger = TaggedLogger.new(logger)
+      @instrumenter = instrumenter
+      @group = group
+      @offset_manager = offset_manager
+      @session_timeout = session_timeout
+      @fetcher = fetcher
+      @heartbeat = heartbeat
+      @refresh_topic_interval = refresh_topic_interval
+      @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
+      @pauses = Hash.new {|h, k|
+        h[k] = Hash.new {|h2, k2|
+          h2[k2] = Pause.new
+        }
+      }
+      # Whether or not the consumer is currently consuming messages.
+      @running = false
+      # Hash containing offsets for each topic and partition that has the
+      # automatically_mark_as_processed feature disabled. Offset manager is only active
+      # when everything is suppose to happen automatically. Otherwise we need to keep track of the
+      # offset manually in memory for all the time
+      # The key structure for this equals an array with topic and partition [topic, partition]
+      # The value is equal to the offset of the last message we've received
+      # @note It won't be updated in case user marks message as processed, because for the case
+      #   when user commits message other than last in a batch, this would make ruby-kafka refetch
+      #   some already consumed messages
+      @current_offsets = Hash.new { |h, k| h[k] = {} }
+      # Map storing subscribed topics with their configuration
+      @subscribed_topics = Hash.new
+      # Set storing topics that matched topics in @subscribed_topics
+      @matched_topics = Set.new
+      # Whether join_group must be executed again because new topics are added
+      @join_group_for_new_topics = false
+    end
+    # Subscribes the consumer to a topic.
+    #
+    # Typically you either want to start reading messages from the very
+    # beginning of the topic's partitions or you simply want to wait for new
+    # messages to be written. In the former case, set `start_from_beginning`
+    # to true (the default); in the latter, set it to false.
+    #
+    # @param topic_or_regex [String, Regexp] subscribe to single topic with a string
+    #   or multiple topics matching a regex.
+    # @param default_offset [Symbol] whether to start from the beginning or the
+    #   end of the topic's partitions. Deprecated.
+    # @param start_from_beginning [Boolean] whether to start from the beginning
+    #   of the topic or just subscribe to new messages being produced. This
+    #   only applies when first consuming a topic partition – once the consumer
+    #   has checkpointed its progress, it will always resume from the last
+    #   checkpoint.
+    # @param max_bytes_per_partition [Integer] the maximum amount of data fetched
+    #   from a single partition at a time.
+    # @return [nil]
+    def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
+      default_offset ||= start_from_beginning ? :earliest : :latest
+      @subscribed_topics[topic_or_regex] = {
+        default_offset: default_offset,
+        start_from_beginning: start_from_beginning,
+        max_bytes_per_partition: max_bytes_per_partition
+      }
+      scan_for_subscribing
+      nil
+    end
+    # Stop the consumer.
+    #
+    # The consumer will finish any in-progress work and shut down.
+    #
+    # @return [nil]
+    def stop
+      @running = false
+      @fetcher.stop
+    end
+    # Pause processing of a specific topic partition.
+    #
+    # When a specific message causes the processor code to fail, it can be a good
+    # idea to simply pause the partition until the error can be resolved, allowing
+    # the rest of the partitions to continue being processed.
+    #
+    # If the `timeout` argument is passed, the partition will automatically be
+    # resumed when the timeout expires. If `exponential_backoff` is enabled, each
+    # subsequent pause will cause the timeout to double until a message from the
+    # partition has been successfully processed.
+    #
+    # @param topic [String]
+    # @param partition [Integer]
+    # @param timeout [nil, Integer] the number of seconds to pause the partition for,
+    #   or `nil` if the partition should not be automatically resumed.
+    # @param max_timeout [nil, Integer] the maximum number of seconds to pause for,
+    #   or `nil` if no maximum should be enforced.
+    # @param exponential_backoff [Boolean] whether to enable exponential backoff.
+    # @return [nil]
+    def pause(topic, partition, timeout: nil, max_timeout: nil, exponential_backoff: false)
+      if max_timeout && !exponential_backoff
+        raise ArgumentError, "`max_timeout` only makes sense when `exponential_backoff` is enabled"
+      end
+      pause_for(topic, partition).pause!(
+        timeout: timeout,
+        max_timeout: max_timeout,
+        exponential_backoff: exponential_backoff,
+      )
+    end
+    # Resume processing of a topic partition.
+    #
+    # @see #pause
+    # @param topic [String]
+    # @param partition [Integer]
+    # @return [nil]
+    def resume(topic, partition)
+      pause_for(topic, partition).resume!
+      # During re-balancing we might have lost the paused partition. Check if partition is still in group before seek.
+      seek_to_next(topic, partition) if @group.assigned_to?(topic, partition)
+    end
+    # Whether the topic partition is currently paused.
+    #
+    # @see #pause
+    # @param topic [String]
+    # @param partition [Integer]
+    # @return [Boolean] true if the partition is paused, false otherwise.
+    def paused?(topic, partition)
+      pause = pause_for(topic, partition)
+      pause.paused? && !pause.expired?
+    end
+    # Fetches and enumerates the messages in the topics that the consumer group
+    # subscribes to.
+    #
+    # Each message is yielded to the provided block. If the block returns
+    # without raising an exception, the message will be considered successfully
+    # processed. At regular intervals the offset of the most recent successfully
+    # processed message in each partition will be committed to the Kafka
+    # offset store. If the consumer crashes or leaves the group, the group member
+    # that is tasked with taking over processing of these partitions will resume
+    # at the last committed offsets.
+    #
+    # @param min_bytes [Integer] the minimum number of bytes to read before
+    #   returning messages from each broker; if `max_wait_time` is reached, this
+    #   is ignored.
+    # @param max_bytes [Integer] the maximum number of bytes to read before
+    #   returning messages from each broker.
+    # @param max_wait_time [Integer, Float] the maximum duration of time to wait before
+    #   returning messages from each broker, in seconds.
+    # @param automatically_mark_as_processed [Boolean] whether to automatically
+    #   mark a message as successfully processed when the block returns
+    #   without an exception. Once marked successful, the offsets of processed
+    #   messages can be committed to Kafka.
+    # @yieldparam message [Kafka::FetchedMessage] a message fetched from Kafka.
+    # @raise [Kafka::ProcessingError] if there was an error processing a message.
+    #   The original exception will be returned by calling `#cause` on the
+    #   {Kafka::ProcessingError} instance.
+    # @return [nil]
+    def each_message(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
+      @fetcher.configure(
+        min_bytes: min_bytes,
+        max_bytes: max_bytes,
+        max_wait_time: max_wait_time,
+      )
+      consumer_loop do
+        batches = fetch_batches
+        batches.each do |batch|
+          batch = @interceptors.call(batch)
+          batch.messages.each do |message|
+            notification = {
+              topic: message.topic,
+              partition: message.partition,
+              offset: message.offset,
+              offset_lag: batch.highwater_mark_offset - message.offset - 1,
+              create_time: message.create_time,
+              key: message.key,
+              value: message.value,
+              headers: message.headers
+            }
+            # Instrument an event immediately so that subscribers don't have to wait until
+            # the block is completed.
+            @instrumenter.instrument("start_process_message.consumer", notification)
+            @instrumenter.instrument("process_message.consumer", notification) do
+              begin
+                yield message unless message.is_control_record
+                @current_offsets[message.topic][message.partition] = message.offset
+              rescue => e
+                location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
+                backtrace = e.backtrace.join("\n")
+                @logger.error "Exception raised when processing #{location} -- #{e.class}: #{e}\n#{backtrace}"
+                raise ProcessingError.new(message.topic, message.partition, message.offset)
+              end
+            end
+            mark_message_as_processed(message) if automatically_mark_as_processed
+            @offset_manager.commit_offsets_if_necessary
+            trigger_heartbeat
+            return if shutting_down?
+          end
+          # We've successfully processed a batch from the partition, so we can clear
+          # the pause.
+          pause_for(batch.topic, batch.partition).reset!
+        end
+        # We may not have received any messages, but it's still a good idea to
+        # commit offsets if we've processed messages in the last set of batches.
+        # This also ensures the offsets are retained if we haven't read any messages
+        # since the offset retention period has elapsed.
+        @offset_manager.commit_offsets_if_necessary
+      end
+    end
+    # Fetches and enumerates the messages in the topics that the consumer group
+    # subscribes to.
+    #
+    # Each batch of messages is yielded to the provided block. If the block returns
+    # without raising an exception, the batch will be considered successfully
+    # processed. At regular intervals the offset of the most recent successfully
+    # processed message batch in each partition will be committed to the Kafka
+    # offset store. If the consumer crashes or leaves the group, the group member
+    # that is tasked with taking over processing of these partitions will resume
+    # at the last committed offsets.
+    #
+    # @param min_bytes [Integer] the minimum number of bytes to read before
+    #   returning messages from each broker; if `max_wait_time` is reached, this
+    #   is ignored.
+    # @param max_bytes [Integer] the maximum number of bytes to read before
+    #   returning messages from each broker.
+    # @param max_wait_time [Integer, Float] the maximum duration of time to wait before
+    #   returning messages from each broker, in seconds.
+    # @param automatically_mark_as_processed [Boolean] whether to automatically
+    #   mark a batch's messages as successfully processed when the block returns
+    #   without an exception. Once marked successful, the offsets of processed
+    #   messages can be committed to Kafka.
+    # @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
+    # @raise [Kafka::ProcessingError] if there was an error processing a batch.
+    #   The original exception will be returned by calling `#cause` on the
+    #   {Kafka::ProcessingError} instance.
+    # @return [nil]
+    def each_batch(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
+      @fetcher.configure(
+        min_bytes: min_bytes,
+        max_bytes: max_bytes,
+        max_wait_time: max_wait_time,
+      )
+      consumer_loop do
+        batches = fetch_batches
+        batches.each do |batch|
+          unless batch.empty?
+            raw_messages = batch.messages
+            batch.messages = raw_messages.reject(&:is_control_record)
+            batch = @interceptors.call(batch)
+            notification = {
+              topic: batch.topic,
+              partition: batch.partition,
+              last_offset: batch.last_offset,
+              last_create_time: batch.messages.last && batch.messages.last.create_time,
+              offset_lag: batch.offset_lag,
+              highwater_mark_offset: batch.highwater_mark_offset,
+              message_count: batch.messages.count,
+            }
+            # Instrument an event immediately so that subscribers don't have to wait until
+            # the block is completed.
+            @instrumenter.instrument("start_process_batch.consumer", notification)
+            @instrumenter.instrument("process_batch.consumer", notification) do
+              begin
+                yield batch
+                @current_offsets[batch.topic][batch.partition] = batch.last_offset unless batch.unknown_last_offset?
+              rescue => e
+                offset_range = (batch.first_offset..batch.last_offset || batch.highwater_mark_offset)
+                location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
+                backtrace = e.backtrace.join("\n")
+                @logger.error "Exception raised when processing #{location} -- #{e.class}: #{e}\n#{backtrace}"
+                raise ProcessingError.new(batch.topic, batch.partition, offset_range)
+              ensure
+                batch.messages = raw_messages
+              end
+            end
+            mark_message_as_processed(batch.messages.last) if automatically_mark_as_processed
+            # We've successfully processed a batch from the partition, so we can clear
+            # the pause.
+            pause_for(batch.topic, batch.partition).reset!
+          end
+          @offset_manager.commit_offsets_if_necessary
+          trigger_heartbeat
+          return if shutting_down?
+        end
+        # We may not have received any messages, but it's still a good idea to
+        # commit offsets if we've processed messages in the last set of batches.
+        # This also ensures the offsets are retained if we haven't read any messages
+        # since the offset retention period has elapsed.
+        @offset_manager.commit_offsets_if_necessary
+      end
+    end
+    # Move the consumer's position in a topic partition to the specified offset.
+    #
+    # Note that this has to be done prior to calling {#each_message} or {#each_batch}
+    # and only has an effect if the consumer is assigned the partition. Typically,
+    # you will want to do this in every consumer group member in order to make sure
+    # that the member that's assigned the partition knows where to start.
+    #
+    # @param topic [String]
+    # @param partition [Integer]
+    # @param offset [Integer]
+    # @return [nil]
+    def seek(topic, partition, offset)
+      @offset_manager.seek_to(topic, partition, offset)
+    end
+    def commit_offsets
+      @offset_manager.commit_offsets
+    end
+    def mark_message_as_processed(message)
+      @offset_manager.mark_as_processed(message.topic, message.partition, message.offset)
+    end
+    def trigger_heartbeat
+      @heartbeat.trigger
+    end
+    def trigger_heartbeat!
+      @heartbeat.trigger!
+    end
+    # Aliases for the external API compatibility
+    alias send_heartbeat_if_necessary trigger_heartbeat
+    alias send_heartbeat trigger_heartbeat!
+    private
+    def consumer_loop
+      @running = true
+      @logger.push_tags(@group.to_s)
+      @fetcher.start
+      while running?
+        begin
+          @instrumenter.instrument("loop.consumer") do
+            refresh_topic_list_if_enabled
+            yield
+          end
+        rescue HeartbeatError
+          make_final_offsets_commit!
+          join_group if running?
+        rescue OffsetCommitError
+          join_group if running?
+        rescue RebalanceInProgress
+          @logger.warn "Group rebalance in progress, re-joining..."
+          join_group if running?
+        rescue FetchError, NotLeaderForPartition, UnknownTopicOrPartition
+          @cluster.mark_as_stale!
+        rescue LeaderNotAvailable => e
+          @logger.error "Leader not available; waiting 1s before retrying"
+          @cluster.mark_as_stale!
+          sleep 1
+        rescue ConnectionError => e
+          @logger.error "Connection error #{e.class}: #{e.message}"
+          @cluster.mark_as_stale!
+        rescue SignalException => e
+          @logger.warn "Received signal #{e.message}, shutting down"
+          @running = false
+        end
+      end
+    ensure
+      @fetcher.stop
+      # In order to quickly have the consumer group re-balance itself, it's
+      # important that members explicitly tell Kafka when they're leaving.
+      make_final_offsets_commit!
+      @group.leave rescue nil
+      @cluster.disconnect
+      @running = false
+      @logger.pop_tags
+    end
+    def make_final_offsets_commit!(attempts = 3)
+      @offset_manager.commit_offsets
+    rescue ConnectionError, OffsetCommitError, EOFError
+      # It's important to make sure final offsets commit is done
+      # As otherwise messages that have been processed after last auto-commit
+      # will be processed again and that may be huge amount of messages
+      return if attempts.zero?
+      @logger.error "Retrying to make final offsets commit (#{attempts} attempts left)"
+      sleep(0.1)
+      make_final_offsets_commit!(attempts - 1)
+    rescue Kafka::Error => e
+      @logger.error "Encountered error while shutting down; #{e.class}: #{e.message}"
+    end
+    def join_group
+      @join_group_for_new_topics = false
+      old_generation_id = @group.generation_id
+      @group.join
+      if old_generation_id && @group.generation_id != old_generation_id + 1
+        # We've been out of the group for at least an entire generation, no
+        # sense in trying to hold on to offset data
+        clear_current_offsets
+        @offset_manager.clear_offsets
+      else
+        # After rejoining the group we may have been assigned a new set of
+        # partitions. Keeping the old offset commits around forever would risk
+        # having the consumer go back and reprocess messages if it's assigned
+        # a partition it used to be assigned to way back. For that reason, we
+        # only keep commits for the partitions that we're still assigned.
+        clear_current_offsets(excluding: @group.assigned_partitions)
+        @offset_manager.clear_offsets_excluding(@group.assigned_partitions)
+      end
+      @fetcher.reset
+      @group.assigned_partitions.each do |topic, partitions|
+        partitions.each do |partition|
+          if paused?(topic, partition)
+            @logger.warn "Not fetching from #{topic}/#{partition} due to pause"
+          else
+            seek_to_next(topic, partition)
+          end
+        end
+      end
+    end
+    def seek_to_next(topic, partition)
+      # When automatic marking is off, the first poll needs to be based on the last committed
+      # offset from Kafka, that's why we fallback in case of nil (it may not be 0)
+      if @current_offsets[topic].key?(partition)
+        offset = @current_offsets[topic][partition] + 1
+      else
+        offset = @offset_manager.next_offset_for(topic, partition)
+      end
+      @fetcher.seek(topic, partition, offset)
+    end
+    def resume_paused_partitions!
+      @pauses.each do |topic, partitions|
+        partitions.each do |partition, pause|
+          @instrumenter.instrument("pause_status.consumer", {
+            topic: topic,
+            partition: partition,
+            duration: pause.pause_duration,
+          })
+          if pause.paused? && pause.expired?
+            @logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
+            resume(topic, partition)
+          end
+        end
+      end
+    end
+    def refresh_topic_list_if_enabled
+      return if @refresh_topic_interval <= 0
+      return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
+      scan_for_subscribing
+      @refreshed_at = Time.now
+    end
+    def fetch_batches
+      # Return early if the consumer has been stopped.
+      return [] if shutting_down?
+      join_group if !@group.member? || @join_group_for_new_topics
+      trigger_heartbeat
+      resume_paused_partitions!
+      if !@fetcher.data?
+        @logger.debug "No batches to process"
+        sleep(@fetcher.max_wait_time || 2)
+        []
+      else
+        tag, message = @fetcher.poll
+        case tag
+        when :batches
+          # make sure any old batches, fetched prior to the completion of a consumer group sync,
+          # are only processed if the batches are from brokers for which this broker is still responsible.
+          message.select { |batch| @group.assigned_to?(batch.topic, batch.partition) }
+        when :exception
+          raise message
+        end
+      end
+    rescue OffsetOutOfRange => e
+      @logger.error "Invalid offset #{e.offset} for #{e.topic}/#{e.partition}, resetting to default offset"
+      @offset_manager.seek_to_default(e.topic, e.partition)
+      retry
+    rescue ConnectionError => e
+      @logger.error "Connection error while fetching messages: #{e}"
+      raise FetchError, e
+    end
+    def pause_for(topic, partition)
+      @pauses[topic][partition]
+    end
+    def running?
+      @running
+    end
+    def shutting_down?
+      !running?
+    end
+    def clear_current_offsets(excluding: {})
+      @current_offsets.each do |topic, partitions|
+        partitions.keep_if do |partition, _|
+          excluding.fetch(topic, []).include?(partition)
+        end
+      end
+    end
+    def scan_for_subscribing
+      @subscribed_topics.each do |topic_or_regex, config|
+        default_offset = config.fetch(:default_offset)
+        start_from_beginning = config.fetch(:start_from_beginning)
+        max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
+        if topic_or_regex.is_a?(Regexp)
+          subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
+        else
+          subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
+        end
+      end
+    end
+    def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
+      cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
+        subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
+      end
+    end
+    def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
+      return if @matched_topics.include?(topic)
+      @matched_topics.add(topic)
+      @join_group_for_new_topics = true
+      @group.subscribe(topic)
+      @offset_manager.set_default_offset(topic, default_offset)
+      @fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
+      @cluster.mark_as_stale!
+    end
+    def cluster_topics
+      attempts = 0
+      begin
+        attempts += 1
+        @cluster.list_topics
+      rescue Kafka::ConnectionError
+        @cluster.mark_as_stale!
+        retry unless attempts > 1
+        raise
+      end
+    end
+  end
+end