racecar 0.5.0.beta2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  class Consumer
3
- Subscription = Struct.new(:topic, :start_from_beginning, :max_bytes_per_partition)
5
+ Subscription = Struct.new(:topic, :start_from_beginning, :max_bytes_per_partition, :additional_config)
4
6
 
5
7
  class << self
6
8
  attr_accessor :max_wait_time
7
9
  attr_accessor :group_id
8
- attr_accessor :offset_retention_time
10
+ attr_accessor :producer, :consumer
9
11
 
10
12
  def subscriptions
11
13
  @subscriptions ||= []
@@ -20,29 +22,68 @@ module Racecar
20
22
  # of each partition.
21
23
  # @param max_bytes_per_partition [Integer] the maximum number of bytes to fetch from
22
24
  # each partition at a time.
25
+ # @param additional_config [Hash] Configuration properties for consumer.
26
+ # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
23
27
  # @return [nil]
24
- def subscribes_to(*topics, start_from_beginning: true, max_bytes_per_partition: 1048576)
28
+ def subscribes_to(*topics, start_from_beginning: true, max_bytes_per_partition: 1048576, additional_config: {})
25
29
  topics.each do |topic|
26
- subscriptions << Subscription.new(topic, start_from_beginning, max_bytes_per_partition)
30
+ subscriptions << Subscription.new(topic, start_from_beginning, max_bytes_per_partition, additional_config)
27
31
  end
28
32
  end
29
33
  end
30
34
 
31
- def configure(consumer:, producer:)
32
- @_consumer = consumer
33
- @_producer = producer
35
+ def configure(producer:, consumer:, instrumenter: NullInstrumenter)
36
+ @producer = producer
37
+ @consumer = consumer
38
+ @instrumenter = instrumenter
34
39
  end
35
40
 
36
41
  def teardown; end
37
42
 
43
+ # Delivers messages that got produced.
44
+ def deliver!
45
+ @delivery_handles ||= []
46
+ if @delivery_handles.any?
47
+ instrumentation_payload = { delivered_message_count: @delivery_handles.size }
48
+
49
+ @instrumenter.instrument('deliver_messages', instrumentation_payload) do
50
+ @delivery_handles.each(&:wait)
51
+ end
52
+ end
53
+ @delivery_handles.clear
54
+ end
55
+
38
56
  protected
39
57
 
40
- def heartbeat
41
- @_consumer.trigger_heartbeat
58
+ # https://github.com/appsignal/rdkafka-ruby#producing-messages
59
+ def produce(payload, topic:, key: nil, partition_key: nil, headers: nil, create_time: nil)
60
+ @delivery_handles ||= []
61
+ message_size = payload.respond_to?(:bytesize) ? payload.bytesize : 0
62
+ instrumentation_payload = {
63
+ value: payload,
64
+ headers: headers,
65
+ key: key,
66
+ partition_key: partition_key,
67
+ topic: topic,
68
+ message_size: message_size,
69
+ create_time: Time.now,
70
+ buffer_size: @delivery_handles.size,
71
+ }
72
+
73
+ @instrumenter.instrument("produce_message", instrumentation_payload) do
74
+ @delivery_handles << @producer.produce(
75
+ topic: topic,
76
+ payload: payload,
77
+ key: key,
78
+ partition_key: partition_key,
79
+ timestamp: create_time,
80
+ headers: headers,
81
+ )
82
+ end
42
83
  end
43
84
 
44
- def produce(value, **options)
45
- @_producer.produce(value, **options)
85
+ def heartbeat
86
+ warn "DEPRECATION WARNING: Manual heartbeats are not supported and not needed with librdkafka."
46
87
  end
47
88
  end
48
89
  end
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Racecar
4
+ class ConsumerSet
5
+ MAX_POLL_TRIES = 10
6
+
7
+ def initialize(config, logger, instrumenter = NullInstrumenter)
8
+ @config, @logger = config, logger
9
+ @instrumenter = instrumenter
10
+ raise ArgumentError, "Subscriptions must not be empty when subscribing" if @config.subscriptions.empty?
11
+
12
+ @consumers = []
13
+ @consumer_id_iterator = (0...@config.subscriptions.size).cycle
14
+
15
+ @previous_retries = 0
16
+
17
+ @last_poll_read_nil_message = false
18
+ end
19
+
20
+ def poll(max_wait_time_ms = @config.max_wait_time_ms)
21
+ batch_poll(max_wait_time_ms, 1).first
22
+ end
23
+
24
+ # batch_poll collects messages until any of the following occurs:
25
+ # - max_wait_time_ms time has passed
26
+ # - max_messages have been collected
27
+ # - a nil message was polled (end of topic, Kafka stalled, etc.)
28
+ #
29
+ # The messages are from a single topic, but potentially from more than one partition.
30
+ #
31
+ # Any errors during polling are retried in an exponential backoff fashion. If an error
32
+ # occurs, but there is no time left for a backoff and retry, it will return the
33
+ # already collected messages and only retry on the next call.
34
+ def batch_poll(max_wait_time_ms = @config.max_wait_time_ms, max_messages = @config.fetch_messages)
35
+ started_at = Time.now
36
+ remain_ms = max_wait_time_ms
37
+ maybe_select_next_consumer
38
+ messages = []
39
+
40
+ while remain_ms > 0 && messages.size < max_messages
41
+ remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
42
+ msg = poll_with_retries(remain_ms)
43
+ break if msg.nil?
44
+ messages << msg
45
+ end
46
+
47
+ messages
48
+ end
49
+
50
+ def store_offset(message)
51
+ current.store_offset(message)
52
+ end
53
+
54
+ def commit
55
+ each_subscribed do |consumer|
56
+ commit_rescue_no_offset(consumer)
57
+ end
58
+ end
59
+
60
+ def close
61
+ each_subscribed(&:close)
62
+ end
63
+
64
+ def current
65
+ @consumers[@consumer_id_iterator.peek] ||= begin
66
+ consumer = Rdkafka::Config.new(rdkafka_config(current_subscription)).consumer
67
+ @instrumenter.instrument('join_group') do
68
+ consumer.subscribe current_subscription.topic
69
+ end
70
+ consumer
71
+ end
72
+ end
73
+
74
+ def each_subscribed
75
+ if block_given?
76
+ @consumers.each { |c| yield c }
77
+ else
78
+ @consumers.each
79
+ end
80
+ end
81
+
82
+ def pause(topic, partition, offset)
83
+ consumer, filtered_tpl = find_consumer_by(topic, partition)
84
+ if !consumer
85
+ @logger.info "Attempted to pause #{topic}/#{partition}, but we're not subscribed to it"
86
+ return
87
+ end
88
+
89
+ consumer.pause(filtered_tpl)
90
+ fake_msg = OpenStruct.new(topic: topic, partition: partition, offset: offset)
91
+ consumer.seek(fake_msg)
92
+ end
93
+
94
+ def resume(topic, partition)
95
+ consumer, filtered_tpl = find_consumer_by(topic, partition)
96
+ if !consumer
97
+ @logger.info "Attempted to resume #{topic}/#{partition}, but we're not subscribed to it"
98
+ return
99
+ end
100
+
101
+ consumer.resume(filtered_tpl)
102
+ end
103
+
104
+ alias :each :each_subscribed
105
+
106
+ # Subscribe to all topics eagerly, even if there's still messages elsewhere. Usually
107
+ # that's not needed and Kafka might rebalance if topics are not polled frequently
108
+ # enough.
109
+ def subscribe_all
110
+ @config.subscriptions.size.times do
111
+ current
112
+ select_next_consumer
113
+ end
114
+ end
115
+
116
+ private
117
+
118
+ # polls a single message from the current consumer, retrying errors with exponential
119
+ # backoff. The sleep time is capped by max_wait_time_ms. If there's enough time budget
120
+ # left, it will retry before returning. If there isn't, the retry will only occur on
121
+ # the next call. It tries up to MAX_POLL_TRIES before passing on the exception.
122
+ def poll_with_retries(max_wait_time_ms)
123
+ try ||= @previous_retries
124
+ @previous_retries = 0
125
+ started_at ||= Time.now
126
+ remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
127
+
128
+ wait_ms = try == 0 ? 0 : 50 * (2**try) # 0ms, 100ms, 200ms, 400ms, …
129
+ if wait_ms >= max_wait_time_ms && remain_ms > 1
130
+ @logger.debug "Capping #{wait_ms}ms to #{max_wait_time_ms-1}ms."
131
+ sleep (max_wait_time_ms-1)/1000.0
132
+ remain_ms = 1
133
+ elsif try == 0 && remain_ms == 0
134
+ @logger.debug "No time remains for polling messages. Will try on next call."
135
+ return nil
136
+ elsif wait_ms >= remain_ms
137
+ @logger.error "Only #{remain_ms}ms left, but want to wait for #{wait_ms}ms before poll. Will retry on next call."
138
+ @previous_retries = try
139
+ return nil
140
+ elsif wait_ms > 0
141
+ sleep wait_ms/1000.0
142
+ remain_ms -= wait_ms
143
+ end
144
+
145
+ poll_current_consumer(remain_ms)
146
+ rescue Rdkafka::RdkafkaError => e
147
+ try += 1
148
+ @instrumenter.instrument("poll_retry", try: try, rdkafka_time_limit: remain_ms, exception: e)
149
+ @logger.error "(try #{try}/#{MAX_POLL_TRIES}): Error for topic subscription #{current_subscription}: #{e}"
150
+ raise if try >= MAX_POLL_TRIES
151
+ retry
152
+ end
153
+
154
+ # polls a message for the current consumer, handling any API edge cases.
155
+ def poll_current_consumer(max_wait_time_ms)
156
+ msg = current.poll(max_wait_time_ms)
157
+ rescue Rdkafka::RdkafkaError => e
158
+ case e.code
159
+ when :max_poll_exceeded, :transport # -147, -195
160
+ reset_current_consumer
161
+ end
162
+ raise
163
+ ensure
164
+ @last_poll_read_nil_message = msg.nil?
165
+ end
166
+
167
+ def find_consumer_by(topic, partition)
168
+ each do |consumer|
169
+ tpl = consumer.assignment.to_h
170
+ rdkafka_partition = tpl[topic]&.detect { |part| part.partition == partition }
171
+ next unless rdkafka_partition
172
+ filtered_tpl = Rdkafka::Consumer::TopicPartitionList.new({ topic => [rdkafka_partition] })
173
+ return consumer, filtered_tpl
174
+ end
175
+
176
+ return nil, nil
177
+ end
178
+
179
+ def current_subscription
180
+ @config.subscriptions[@consumer_id_iterator.peek]
181
+ end
182
+
183
+ def reset_current_consumer
184
+ current_consumer_id = @consumer_id_iterator.peek
185
+ @logger.info "Resetting consumer with id: #{current_consumer_id}"
186
+
187
+ consumer = @consumers[current_consumer_id]
188
+ consumer.close unless consumer.nil?
189
+ @consumers[current_consumer_id] = nil
190
+ end
191
+
192
+ def maybe_select_next_consumer
193
+ return unless @last_poll_read_nil_message
194
+ @last_poll_read_nil_message = false
195
+ select_next_consumer
196
+ end
197
+
198
+ def select_next_consumer
199
+ @consumer_id_iterator.next
200
+ end
201
+
202
+ def commit_rescue_no_offset(consumer)
203
+ consumer.commit(nil, !@config.synchronous_commits)
204
+ rescue Rdkafka::RdkafkaError => e
205
+ raise e if e.code != :no_offset
206
+ @logger.debug "Nothing to commit."
207
+ end
208
+
209
+ def rdkafka_config(subscription)
210
+ # https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
211
+ config = {
212
+ "auto.commit.interval.ms" => @config.offset_commit_interval * 1000,
213
+ "auto.offset.reset" => subscription.start_from_beginning ? "earliest" : "largest",
214
+ "bootstrap.servers" => @config.brokers.join(","),
215
+ "client.id" => @config.client_id,
216
+ "enable.partition.eof" => false,
217
+ "fetch.max.bytes" => @config.max_bytes,
218
+ "message.max.bytes" => subscription.max_bytes_per_partition,
219
+ "fetch.min.bytes" => @config.fetch_min_bytes,
220
+ "fetch.wait.max.ms" => @config.max_wait_time_ms,
221
+ "group.id" => @config.group_id,
222
+ "heartbeat.interval.ms" => @config.heartbeat_interval * 1000,
223
+ "max.poll.interval.ms" => @config.max_poll_interval * 1000,
224
+ "queued.min.messages" => @config.min_message_queue_size,
225
+ "session.timeout.ms" => @config.session_timeout * 1000,
226
+ "socket.timeout.ms" => @config.socket_timeout * 1000,
227
+ "statistics.interval.ms" => 1000, # 1s is the highest granularity offered
228
+ }
229
+ config.merge! @config.rdkafka_consumer
230
+ config.merge! subscription.additional_config
231
+ config
232
+ end
233
+
234
+ def remaining_time_ms(limit_ms, started_at_time)
235
+ r = limit_ms - ((Time.now - started_at_time)*1000).round
236
+ r <= 0 ? 0 : r
237
+ end
238
+ end
239
+ end
data/lib/racecar/ctl.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "optparse"
2
4
  require "racecar/rails_config_file_loader"
3
5
  require "racecar/daemon"
@@ -93,15 +95,13 @@ module Racecar
93
95
 
94
96
  Racecar.config.validate!
95
97
 
96
- kafka = Kafka.new(
97
- client_id: Racecar.config.client_id,
98
- seed_brokers: Racecar.config.brokers,
99
- logger: Racecar.logger,
100
- connect_timeout: Racecar.config.connect_timeout,
101
- socket_timeout: Racecar.config.socket_timeout,
102
- )
98
+ producer = Rdkafka::Config.new({
99
+ "bootstrap.servers": Racecar.config.brokers.join(","),
100
+ "client.id": Racecar.config.client_id,
101
+ }.merge(Racecar.config.rdkafka_producer)).producer
103
102
 
104
- kafka.deliver_message(message.value, key: message.key, topic: message.topic)
103
+ handle = producer.produce(payload: message.value, key: message.key, topic: message.topic)
104
+ handle.wait(max_wait_timeout: 5)
105
105
 
106
106
  $stderr.puts "=> Delivered message to Kafka cluster"
107
107
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  class Daemon
3
5
  attr_reader :pidfile
@@ -0,0 +1,247 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "datadog/statsd"
5
+ rescue LoadError
6
+ $stderr.puts "In order to report Kafka client metrics to Datadog you need to install the `dogstatsd-ruby` gem."
7
+ raise
8
+ end
9
+
10
+ require "active_support/subscriber"
11
+
12
+ module Racecar
13
+ module Datadog
14
+ STATSD_NAMESPACE = "racecar"
15
+
16
+ class << self
17
+ def configure
18
+ yield self
19
+ end
20
+
21
+ def statsd
22
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
23
+ end
24
+
25
+ def statsd=(statsd)
26
+ clear
27
+ @statsd = statsd
28
+ end
29
+
30
+ def host
31
+ @host
32
+ end
33
+
34
+ def host=(host)
35
+ @host = host
36
+ clear
37
+ end
38
+
39
+ def port
40
+ @port
41
+ end
42
+
43
+ def port=(port)
44
+ @port = port
45
+ clear
46
+ end
47
+
48
+ def namespace
49
+ @namespace ||= STATSD_NAMESPACE
50
+ end
51
+
52
+ def namespace=(namespace)
53
+ @namespace = namespace
54
+ clear
55
+ end
56
+
57
+ def tags
58
+ @tags ||= []
59
+ end
60
+
61
+ def tags=(tags)
62
+ @tags = tags
63
+ clear
64
+ end
65
+
66
+ private
67
+
68
+ def clear
69
+ @statsd && @statsd.close
70
+ @statsd = nil
71
+ end
72
+ end
73
+
74
+ class StatsdSubscriber < ActiveSupport::Subscriber
75
+ private
76
+
77
+ %w[increment histogram count timing gauge].each do |type|
78
+ define_method(type) do |*args|
79
+ emit(type, *args)
80
+ end
81
+ end
82
+
83
+ def emit(type, *args, tags: {})
84
+ tags = tags.map {|k, v| "#{k}:#{v}" }.to_a
85
+
86
+ Racecar::Datadog.statsd.send(type, *args, tags: tags)
87
+ end
88
+ end
89
+
90
+ class ConsumerSubscriber < StatsdSubscriber
91
+ def process_message(event)
92
+ offset = event.payload.fetch(:offset)
93
+ create_time = event.payload.fetch(:create_time)
94
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
95
+ tags = default_tags(event)
96
+
97
+ if event.payload.key?(:exception)
98
+ increment("consumer.process_message.errors", tags: tags)
99
+ else
100
+ timing("consumer.process_message.latency", event.duration, tags: tags)
101
+ increment("consumer.messages", tags: tags)
102
+ end
103
+
104
+ gauge("consumer.offset", offset, tags: tags)
105
+
106
+ # Not all messages have timestamps.
107
+ if time_lag
108
+ gauge("consumer.time_lag", time_lag, tags: tags)
109
+ end
110
+ end
111
+
112
+ def process_batch(event)
113
+ offset = event.payload.fetch(:last_offset)
114
+ messages = event.payload.fetch(:message_count)
115
+ last_create_time = event.payload.fetch(:last_create_time)
116
+ time_lag = last_create_time && ((Time.now - last_create_time) * 1000).to_i
117
+ tags = default_tags(event)
118
+
119
+ if event.payload.key?(:exception)
120
+ increment("consumer.process_batch.errors", tags: tags)
121
+ else
122
+ timing("consumer.process_batch.latency", event.duration, tags: tags)
123
+ count("consumer.messages", messages, tags: tags)
124
+ end
125
+
126
+ histogram("consumer.batch_size", messages, tags: tags)
127
+ gauge("consumer.offset", offset, tags: tags)
128
+
129
+ if time_lag
130
+ gauge("consumer.time_lag", time_lag, tags: tags)
131
+ end
132
+ end
133
+
134
+ def join_group(event)
135
+ tags = {
136
+ client: event.payload.fetch(:client_id),
137
+ group_id: event.payload.fetch(:group_id),
138
+ }
139
+
140
+ timing("consumer.join_group", event.duration, tags: tags)
141
+
142
+ if event.payload.key?(:exception)
143
+ increment("consumer.join_group.errors", tags: tags)
144
+ end
145
+ end
146
+
147
+ def leave_group(event)
148
+ tags = {
149
+ client: event.payload.fetch(:client_id),
150
+ group_id: event.payload.fetch(:group_id),
151
+ }
152
+
153
+ timing("consumer.leave_group", event.duration, tags: tags)
154
+
155
+ if event.payload.key?(:exception)
156
+ increment("consumer.leave_group.errors", tags: tags)
157
+ end
158
+ end
159
+
160
+ def poll_retry(event)
161
+ tags = {
162
+ client: event.payload.fetch(:client_id),
163
+ group_id: event.payload.fetch(:group_id),
164
+ }
165
+ rdkafka_error_code = event.payload.fetch(:exception).code.to_s.gsub(/\W/, '')
166
+ increment("consumer.poll.rdkafka_error.#{rdkafka_error_code}", tags: tags)
167
+ end
168
+
169
+ def main_loop(event)
170
+ tags = {
171
+ client: event.payload.fetch(:client_id),
172
+ group_id: event.payload.fetch(:group_id),
173
+ }
174
+
175
+ histogram("consumer.loop.duration", event.duration, tags: tags)
176
+ end
177
+
178
+ def pause_status(event)
179
+ duration = event.payload.fetch(:duration)
180
+
181
+ gauge("consumer.pause.duration", duration, tags: default_tags(event))
182
+ end
183
+
184
+ private
185
+
186
+ def default_tags(event)
187
+ {
188
+ client: event.payload.fetch(:client_id),
189
+ group_id: event.payload.fetch(:group_id),
190
+ topic: event.payload.fetch(:topic),
191
+ partition: event.payload.fetch(:partition),
192
+ }
193
+ end
194
+
195
+ attach_to "racecar"
196
+ end
197
+
198
+ class ProducerSubscriber < StatsdSubscriber
199
+ def produce_message(event)
200
+ client = event.payload.fetch(:client_id)
201
+ topic = event.payload.fetch(:topic)
202
+ message_size = event.payload.fetch(:message_size)
203
+ buffer_size = event.payload.fetch(:buffer_size)
204
+
205
+ tags = {
206
+ client: client,
207
+ topic: topic,
208
+ }
209
+
210
+ # This gets us the write rate.
211
+ increment("producer.produce.messages", tags: tags.merge(topic: topic))
212
+
213
+ # Information about typical/average/95p message size.
214
+ histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
215
+
216
+ # Aggregate message size.
217
+ count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
218
+
219
+ # This gets us the avg/max buffer size per producer.
220
+ histogram("producer.buffer.size", buffer_size, tags: tags)
221
+ end
222
+
223
+ def deliver_messages(event)
224
+ client = event.payload.fetch(:client_id)
225
+ message_count = event.payload.fetch(:delivered_message_count)
226
+
227
+ tags = {
228
+ client: client,
229
+ }
230
+
231
+ timing("producer.deliver.latency", event.duration, tags: tags)
232
+
233
+ # Messages delivered to Kafka:
234
+ count("producer.deliver.messages", message_count, tags: tags)
235
+ end
236
+
237
+ def acknowledged_message(event)
238
+ tags = { client: event.payload.fetch(:client_id) }
239
+
240
+ # Number of messages ACK'd for the topic.
241
+ increment("producer.ack.messages", tags: tags)
242
+ end
243
+
244
+ attach_to "racecar"
245
+ end
246
+ end
247
+ end