RubyGems - racecar - Versions diffs - 0.5.0.beta2 → 2.2.0 - Mend

racecar 0.5.0.beta2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +5 -5
data/.circleci/config.yml +56 -0
data/.github/workflows/ci.yml +61 -0
data/.gitignore +0 -1
data/CHANGELOG.md +48 -0
data/Gemfile +2 -0
data/Gemfile.lock +69 -0
data/README.md +56 -59
data/Rakefile +2 -0
data/docker-compose.yml +32 -0
data/examples/batch_consumer.rb +2 -0
data/examples/cat_consumer.rb +2 -0
data/examples/producing_consumer.rb +2 -0
data/exe/racecar +36 -13
data/lib/ensure_hash_compact.rb +12 -0
data/lib/generators/racecar/consumer_generator.rb +2 -0
data/lib/generators/racecar/install_generator.rb +2 -0
data/lib/racecar.rb +20 -14
data/lib/racecar/cli.rb +25 -22
data/lib/racecar/config.rb +109 -45
data/lib/racecar/consumer.rb +52 -11
data/lib/racecar/consumer_set.rb +239 -0
data/lib/racecar/ctl.rb +8 -8
data/lib/racecar/daemon.rb +2 -0
data/lib/racecar/datadog.rb +247 -0
data/lib/racecar/instrumenter.rb +28 -0
data/lib/racecar/message.rb +30 -0
data/lib/racecar/null_instrumenter.rb +10 -0
data/lib/racecar/pause.rb +59 -0
data/lib/racecar/rails_config_file_loader.rb +2 -0
data/lib/racecar/runner.rb +222 -113
data/lib/racecar/version.rb +3 -1
data/racecar.gemspec +7 -3
metadata +91 -13

data/lib/racecar/consumer.rb CHANGED Viewed

@@ -1,11 +1,13 @@
+# frozen_string_literal: true
 module Racecar
   class Consumer
-    Subscription = Struct.new(:topic, :start_from_beginning, :max_bytes_per_partition)
+    Subscription = Struct.new(:topic, :start_from_beginning, :max_bytes_per_partition, :additional_config)
     class << self
       attr_accessor :max_wait_time
       attr_accessor :group_id
-      attr_accessor :offset_retention_time
+      attr_accessor :producer, :consumer
       def subscriptions
         @subscriptions ||= []
@@ -20,29 +22,68 @@ module Racecar
       #   of each partition.
       # @param max_bytes_per_partition [Integer] the maximum number of bytes to fetch from
       #   each partition at a time.
+      # @param additional_config [Hash] Configuration properties for consumer.
+      #   See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
       # @return [nil]
-      def subscribes_to(*topics, start_from_beginning: true, max_bytes_per_partition: 1048576)
+      def subscribes_to(*topics, start_from_beginning: true, max_bytes_per_partition: 1048576, additional_config: {})
         topics.each do |topic|
-          subscriptions << Subscription.new(topic, start_from_beginning, max_bytes_per_partition)
+          subscriptions << Subscription.new(topic, start_from_beginning, max_bytes_per_partition, additional_config)
         end
       end
     end
-    def configure(consumer:, producer:)
-      @_consumer = consumer
-      @_producer = producer
+    def configure(producer:, consumer:, instrumenter: NullInstrumenter)
+      @producer = producer
+      @consumer = consumer
+      @instrumenter = instrumenter
     end
     def teardown; end
+    # Delivers messages that got produced.
+    def deliver!
+      @delivery_handles ||= []
+      if @delivery_handles.any?
+        instrumentation_payload = { delivered_message_count: @delivery_handles.size }
+        @instrumenter.instrument('deliver_messages', instrumentation_payload) do
+          @delivery_handles.each(&:wait)
+        end
+      end
+      @delivery_handles.clear
+    end
     protected
-    def heartbeat
-      @_consumer.trigger_heartbeat
+    # https://github.com/appsignal/rdkafka-ruby#producing-messages
+    def produce(payload, topic:, key: nil, partition_key: nil, headers: nil, create_time: nil)
+      @delivery_handles ||= []
+      message_size = payload.respond_to?(:bytesize) ? payload.bytesize : 0
+      instrumentation_payload = {
+        value: payload,
+        headers: headers,
+        key: key,
+        partition_key: partition_key,
+        topic: topic,
+        message_size: message_size,
+        create_time: Time.now,
+        buffer_size: @delivery_handles.size,
+      }
+      @instrumenter.instrument("produce_message", instrumentation_payload) do
+        @delivery_handles << @producer.produce(
+          topic: topic,
+          payload: payload,
+          key: key,
+          partition_key: partition_key,
+          timestamp: create_time,
+          headers: headers,
+        )
+      end
     end
-    def produce(value, **options)
-      @_producer.produce(value, **options)
+    def heartbeat
+      warn "DEPRECATION WARNING: Manual heartbeats are not supported and not needed with librdkafka."
     end
   end
 end

data/lib/racecar/consumer_set.rb ADDED Viewed

@@ -0,0 +1,239 @@
+# frozen_string_literal: true
+module Racecar
+  class ConsumerSet
+    MAX_POLL_TRIES = 10
+    def initialize(config, logger, instrumenter = NullInstrumenter)
+      @config, @logger = config, logger
+      @instrumenter = instrumenter
+      raise ArgumentError, "Subscriptions must not be empty when subscribing" if @config.subscriptions.empty?
+      @consumers = []
+      @consumer_id_iterator = (0...@config.subscriptions.size).cycle
+      @previous_retries = 0
+      @last_poll_read_nil_message = false
+    end
+    def poll(max_wait_time_ms = @config.max_wait_time_ms)
+      batch_poll(max_wait_time_ms, 1).first
+    end
+    # batch_poll collects messages until any of the following occurs:
+    # - max_wait_time_ms time has passed
+    # - max_messages have been collected
+    # - a nil message was polled (end of topic, Kafka stalled, etc.)
+    #
+    # The messages are from a single topic, but potentially from more than one partition.
+    #
+    # Any errors during polling are retried in an exponential backoff fashion. If an error
+    # occurs, but there is no time left for a backoff and retry, it will return the
+    # already collected messages and only retry on the next call.
+    def batch_poll(max_wait_time_ms = @config.max_wait_time_ms, max_messages = @config.fetch_messages)
+      started_at = Time.now
+      remain_ms = max_wait_time_ms
+      maybe_select_next_consumer
+      messages = []
+      while remain_ms > 0 && messages.size < max_messages
+        remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
+        msg = poll_with_retries(remain_ms)
+        break if msg.nil?
+        messages << msg
+      end
+      messages
+    end
+    def store_offset(message)
+      current.store_offset(message)
+    end
+    def commit
+      each_subscribed do |consumer|
+        commit_rescue_no_offset(consumer)
+      end
+    end
+    def close
+      each_subscribed(&:close)
+    end
+    def current
+      @consumers[@consumer_id_iterator.peek] ||= begin
+        consumer = Rdkafka::Config.new(rdkafka_config(current_subscription)).consumer
+        @instrumenter.instrument('join_group') do
+          consumer.subscribe current_subscription.topic
+        end
+        consumer
+      end
+    end
+    def each_subscribed
+      if block_given?
+        @consumers.each { |c| yield c }
+      else
+        @consumers.each
+      end
+    end
+    def pause(topic, partition, offset)
+      consumer, filtered_tpl = find_consumer_by(topic, partition)
+      if !consumer
+        @logger.info "Attempted to pause #{topic}/#{partition}, but we're not subscribed to it"
+        return
+      end
+      consumer.pause(filtered_tpl)
+      fake_msg = OpenStruct.new(topic: topic, partition: partition, offset: offset)
+      consumer.seek(fake_msg)
+    end
+    def resume(topic, partition)
+      consumer, filtered_tpl = find_consumer_by(topic, partition)
+      if !consumer
+        @logger.info "Attempted to resume #{topic}/#{partition}, but we're not subscribed to it"
+        return
+      end
+      consumer.resume(filtered_tpl)
+    end
+    alias :each :each_subscribed
+    # Subscribe to all topics eagerly, even if there's still messages elsewhere. Usually
+    # that's not needed and Kafka might rebalance if topics are not polled frequently
+    # enough.
+    def subscribe_all
+      @config.subscriptions.size.times do
+        current
+        select_next_consumer
+      end
+    end
+    private
+    # polls a single message from the current consumer, retrying errors with exponential
+    # backoff. The sleep time is capped by max_wait_time_ms. If there's enough time budget
+    # left, it will retry before returning. If there isn't, the retry will only occur on
+    # the next call. It tries up to MAX_POLL_TRIES before passing on the exception.
+    def poll_with_retries(max_wait_time_ms)
+      try ||= @previous_retries
+      @previous_retries = 0
+      started_at ||= Time.now
+      remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
+      wait_ms = try == 0 ? 0 : 50 * (2**try) # 0ms, 100ms, 200ms, 400ms, …
+      if wait_ms >= max_wait_time_ms && remain_ms > 1
+        @logger.debug "Capping #{wait_ms}ms to #{max_wait_time_ms-1}ms."
+        sleep (max_wait_time_ms-1)/1000.0
+        remain_ms = 1
+      elsif try == 0 && remain_ms == 0
+        @logger.debug "No time remains for polling messages. Will try on next call."
+        return nil
+      elsif wait_ms >= remain_ms
+        @logger.error "Only #{remain_ms}ms left, but want to wait for #{wait_ms}ms before poll. Will retry on next call."
+        @previous_retries = try
+        return nil
+      elsif wait_ms > 0
+        sleep wait_ms/1000.0
+        remain_ms -= wait_ms
+      end
+      poll_current_consumer(remain_ms)
+    rescue Rdkafka::RdkafkaError => e
+      try += 1
+      @instrumenter.instrument("poll_retry", try: try, rdkafka_time_limit: remain_ms, exception: e)
+      @logger.error "(try #{try}/#{MAX_POLL_TRIES}): Error for topic subscription #{current_subscription}: #{e}"
+      raise if try >= MAX_POLL_TRIES
+      retry
+    end
+    # polls a message for the current consumer, handling any API edge cases.
+    def poll_current_consumer(max_wait_time_ms)
+      msg = current.poll(max_wait_time_ms)
+    rescue Rdkafka::RdkafkaError => e
+      case e.code
+      when :max_poll_exceeded, :transport # -147, -195
+        reset_current_consumer
+      end
+      raise
+    ensure
+      @last_poll_read_nil_message = msg.nil?
+    end
+    def find_consumer_by(topic, partition)
+      each do |consumer|
+        tpl = consumer.assignment.to_h
+        rdkafka_partition = tpl[topic]&.detect { |part| part.partition == partition }
+        next unless rdkafka_partition
+        filtered_tpl = Rdkafka::Consumer::TopicPartitionList.new({ topic => [rdkafka_partition] })
+        return consumer, filtered_tpl
+      end
+      return nil, nil
+    end
+    def current_subscription
+      @config.subscriptions[@consumer_id_iterator.peek]
+    end
+    def reset_current_consumer
+      current_consumer_id = @consumer_id_iterator.peek
+      @logger.info "Resetting consumer with id: #{current_consumer_id}"
+      consumer = @consumers[current_consumer_id]
+      consumer.close unless consumer.nil?
+      @consumers[current_consumer_id] = nil
+    end
+    def maybe_select_next_consumer
+      return unless @last_poll_read_nil_message
+      @last_poll_read_nil_message = false
+      select_next_consumer
+    end
+    def select_next_consumer
+      @consumer_id_iterator.next
+    end
+    def commit_rescue_no_offset(consumer)
+      consumer.commit(nil, !@config.synchronous_commits)
+    rescue Rdkafka::RdkafkaError => e
+      raise e if e.code != :no_offset
+      @logger.debug "Nothing to commit."
+    end
+    def rdkafka_config(subscription)
+      # https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
+      config = {
+        "auto.commit.interval.ms" => @config.offset_commit_interval * 1000,
+        "auto.offset.reset"       => subscription.start_from_beginning ? "earliest" : "largest",
+        "bootstrap.servers"       => @config.brokers.join(","),
+        "client.id"               => @config.client_id,
+        "enable.partition.eof"    => false,
+        "fetch.max.bytes"         => @config.max_bytes,
+        "message.max.bytes"       => subscription.max_bytes_per_partition,
+        "fetch.min.bytes"         => @config.fetch_min_bytes,
+        "fetch.wait.max.ms"       => @config.max_wait_time_ms,
+        "group.id"                => @config.group_id,
+        "heartbeat.interval.ms"   => @config.heartbeat_interval * 1000,
+        "max.poll.interval.ms"    => @config.max_poll_interval * 1000,
+        "queued.min.messages"     => @config.min_message_queue_size,
+        "session.timeout.ms"      => @config.session_timeout * 1000,
+        "socket.timeout.ms"       => @config.socket_timeout * 1000,
+        "statistics.interval.ms"  => 1000, # 1s is the highest granularity offered
+      }
+      config.merge! @config.rdkafka_consumer
+      config.merge! subscription.additional_config
+      config
+    end
+    def remaining_time_ms(limit_ms, started_at_time)
+      r = limit_ms - ((Time.now - started_at_time)*1000).round
+      r <= 0 ? 0 : r
+    end
+  end
+end

data/lib/racecar/ctl.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "optparse"
 require "racecar/rails_config_file_loader"
 require "racecar/daemon"
@@ -93,15 +95,13 @@ module Racecar
       Racecar.config.validate!
-      kafka = Kafka.new(
-        client_id: Racecar.config.client_id,
-        seed_brokers: Racecar.config.brokers,
-        logger: Racecar.logger,
-        connect_timeout: Racecar.config.connect_timeout,
-        socket_timeout: Racecar.config.socket_timeout,
-      )
+      producer = Rdkafka::Config.new({
+        "bootstrap.servers": Racecar.config.brokers.join(","),
+        "client.id":         Racecar.config.client_id,
+      }.merge(Racecar.config.rdkafka_producer)).producer
-      kafka.deliver_message(message.value, key: message.key, topic: message.topic)
+      handle = producer.produce(payload: message.value, key: message.key, topic: message.topic)
+      handle.wait(max_wait_timeout: 5)
       $stderr.puts "=> Delivered message to Kafka cluster"
     end

data/lib/racecar/daemon.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Racecar
   class Daemon
     attr_reader :pidfile

data/lib/racecar/datadog.rb ADDED Viewed

@@ -0,0 +1,247 @@
+# frozen_string_literal: true
+begin
+  require "datadog/statsd"
+rescue LoadError
+  $stderr.puts "In order to report Kafka client metrics to Datadog you need to install the `dogstatsd-ruby` gem."
+  raise
+end
+require "active_support/subscriber"
+module Racecar
+  module Datadog
+    STATSD_NAMESPACE = "racecar"
+    class << self
+      def configure
+        yield self
+      end
+      def statsd
+        @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
+      end
+      def statsd=(statsd)
+        clear
+        @statsd = statsd
+      end
+      def host
+        @host
+      end
+      def host=(host)
+        @host = host
+        clear
+      end
+      def port
+        @port
+      end
+      def port=(port)
+        @port = port
+        clear
+      end
+      def namespace
+        @namespace ||= STATSD_NAMESPACE
+      end
+      def namespace=(namespace)
+        @namespace = namespace
+        clear
+      end
+      def tags
+        @tags ||= []
+      end
+      def tags=(tags)
+        @tags = tags
+        clear
+      end
+      private
+      def clear
+        @statsd && @statsd.close
+        @statsd = nil
+      end
+    end
+    class StatsdSubscriber < ActiveSupport::Subscriber
+      private
+      %w[increment histogram count timing gauge].each do |type|
+        define_method(type) do |*args|
+          emit(type, *args)
+        end
+      end
+      def emit(type, *args, tags: {})
+        tags = tags.map {|k, v| "#{k}:#{v}" }.to_a
+        Racecar::Datadog.statsd.send(type, *args, tags: tags)
+      end
+    end
+    class ConsumerSubscriber < StatsdSubscriber
+      def process_message(event)
+        offset = event.payload.fetch(:offset)
+        create_time = event.payload.fetch(:create_time)
+        time_lag = create_time && ((Time.now - create_time) * 1000).to_i
+        tags = default_tags(event)
+        if event.payload.key?(:exception)
+          increment("consumer.process_message.errors", tags: tags)
+        else
+          timing("consumer.process_message.latency", event.duration, tags: tags)
+          increment("consumer.messages", tags: tags)
+        end
+        gauge("consumer.offset", offset, tags: tags)
+        # Not all messages have timestamps.
+        if time_lag
+          gauge("consumer.time_lag", time_lag, tags: tags)
+        end
+      end
+      def process_batch(event)
+        offset = event.payload.fetch(:last_offset)
+        messages = event.payload.fetch(:message_count)
+        last_create_time = event.payload.fetch(:last_create_time)
+        time_lag = last_create_time && ((Time.now - last_create_time) * 1000).to_i
+        tags = default_tags(event)
+        if event.payload.key?(:exception)
+          increment("consumer.process_batch.errors", tags: tags)
+        else
+          timing("consumer.process_batch.latency", event.duration, tags: tags)
+          count("consumer.messages", messages, tags: tags)
+        end
+        histogram("consumer.batch_size", messages, tags: tags)
+        gauge("consumer.offset", offset, tags: tags)
+        if time_lag
+          gauge("consumer.time_lag", time_lag, tags: tags)
+        end
+      end
+      def join_group(event)
+        tags = {
+          client: event.payload.fetch(:client_id),
+          group_id: event.payload.fetch(:group_id),
+        }
+        timing("consumer.join_group", event.duration, tags: tags)
+        if event.payload.key?(:exception)
+          increment("consumer.join_group.errors", tags: tags)
+        end
+      end
+      def leave_group(event)
+        tags = {
+          client: event.payload.fetch(:client_id),
+          group_id: event.payload.fetch(:group_id),
+        }
+        timing("consumer.leave_group", event.duration, tags: tags)
+        if event.payload.key?(:exception)
+          increment("consumer.leave_group.errors", tags: tags)
+        end
+      end
+      def poll_retry(event)
+        tags = {
+          client: event.payload.fetch(:client_id),
+          group_id: event.payload.fetch(:group_id),
+        }
+        rdkafka_error_code = event.payload.fetch(:exception).code.to_s.gsub(/\W/, '')
+        increment("consumer.poll.rdkafka_error.#{rdkafka_error_code}", tags: tags)
+      end
+      def main_loop(event)
+        tags = {
+          client: event.payload.fetch(:client_id),
+          group_id: event.payload.fetch(:group_id),
+        }
+        histogram("consumer.loop.duration", event.duration, tags: tags)
+      end
+      def pause_status(event)
+        duration = event.payload.fetch(:duration)
+        gauge("consumer.pause.duration", duration, tags: default_tags(event))
+      end
+      private
+      def default_tags(event)
+        {
+          client: event.payload.fetch(:client_id),
+          group_id: event.payload.fetch(:group_id),
+          topic: event.payload.fetch(:topic),
+          partition: event.payload.fetch(:partition),
+        }
+      end
+      attach_to "racecar"
+    end
+    class ProducerSubscriber < StatsdSubscriber
+      def produce_message(event)
+        client = event.payload.fetch(:client_id)
+        topic = event.payload.fetch(:topic)
+        message_size = event.payload.fetch(:message_size)
+        buffer_size = event.payload.fetch(:buffer_size)
+        tags = {
+          client: client,
+          topic: topic,
+        }
+        # This gets us the write rate.
+        increment("producer.produce.messages", tags: tags.merge(topic: topic))
+        # Information about typical/average/95p message size.
+        histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
+        # Aggregate message size.
+        count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
+        # This gets us the avg/max buffer size per producer.
+        histogram("producer.buffer.size", buffer_size, tags: tags)
+      end
+      def deliver_messages(event)
+        client = event.payload.fetch(:client_id)
+        message_count = event.payload.fetch(:delivered_message_count)
+        tags = {
+          client: client,
+        }
+        timing("producer.deliver.latency", event.duration, tags: tags)
+        # Messages delivered to Kafka:
+        count("producer.deliver.messages", message_count, tags: tags)
+      end
+      def acknowledged_message(event)
+        tags = { client: event.payload.fetch(:client_id) }
+        # Number of messages ACK'd for the topic.
+        increment("producer.ack.messages", tags: tags)
+      end
+      attach_to "racecar"
+    end
+  end
+end