racecar 2.0.0 → 2.10.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +17 -0
  3. data/.github/workflows/ci.yml +46 -0
  4. data/.github/workflows/publish.yml +12 -0
  5. data/.gitignore +1 -2
  6. data/CHANGELOG.md +83 -1
  7. data/Dockerfile +9 -0
  8. data/Gemfile +6 -0
  9. data/Gemfile.lock +72 -0
  10. data/README.md +303 -82
  11. data/Rakefile +5 -0
  12. data/docker-compose.yml +65 -0
  13. data/examples/batch_consumer.rb +4 -2
  14. data/examples/cat_consumer.rb +2 -0
  15. data/examples/producing_consumer.rb +2 -0
  16. data/exe/racecar +37 -14
  17. data/extra/datadog-dashboard.json +1 -0
  18. data/lib/ensure_hash_compact.rb +2 -0
  19. data/lib/generators/racecar/consumer_generator.rb +2 -0
  20. data/lib/generators/racecar/install_generator.rb +2 -0
  21. data/lib/racecar/cli.rb +26 -21
  22. data/lib/racecar/config.rb +80 -4
  23. data/lib/racecar/consumer.rb +51 -6
  24. data/lib/racecar/consumer_set.rb +113 -44
  25. data/lib/racecar/ctl.rb +31 -3
  26. data/lib/racecar/daemon.rb +4 -2
  27. data/lib/racecar/datadog.rb +83 -3
  28. data/lib/racecar/delivery_callback.rb +27 -0
  29. data/lib/racecar/erroneous_state_error.rb +34 -0
  30. data/lib/racecar/heroku.rb +49 -0
  31. data/lib/racecar/instrumenter.rb +4 -7
  32. data/lib/racecar/liveness_probe.rb +78 -0
  33. data/lib/racecar/message.rb +6 -1
  34. data/lib/racecar/message_delivery_error.rb +112 -0
  35. data/lib/racecar/null_instrumenter.rb +2 -0
  36. data/lib/racecar/parallel_runner.rb +110 -0
  37. data/lib/racecar/pause.rb +8 -4
  38. data/lib/racecar/producer.rb +139 -0
  39. data/lib/racecar/rails_config_file_loader.rb +7 -1
  40. data/lib/racecar/rebalance_listener.rb +58 -0
  41. data/lib/racecar/runner.rb +79 -37
  42. data/lib/racecar/version.rb +3 -1
  43. data/lib/racecar.rb +36 -8
  44. data/racecar.gemspec +7 -4
  45. metadata +47 -25
  46. data/.github/workflows/rspec.yml +0 -24
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  class ConsumerSet
3
5
  MAX_POLL_TRIES = 10
@@ -10,53 +12,50 @@ module Racecar
10
12
  @consumers = []
11
13
  @consumer_id_iterator = (0...@config.subscriptions.size).cycle
12
14
 
15
+ @previous_retries = 0
16
+
13
17
  @last_poll_read_nil_message = false
18
+ @paused_tpls = Hash.new { |h, k| h[k] = {} }
14
19
  end
15
20
 
16
- def poll(timeout_ms)
17
- maybe_select_next_consumer
18
- started_at ||= Time.now
19
- try ||= 0
20
- remain ||= timeout_ms
21
-
22
- msg = remain <= 0 ? nil : current.poll(remain)
23
- rescue Rdkafka::RdkafkaError => e
24
- wait_before_retry_ms = 100 * (2**try) # 100ms, 200ms, 400ms, …
25
- try += 1
26
- raise if try >= MAX_POLL_TRIES || remain <= wait_before_retry_ms
27
-
28
- @logger.error "(try #{try}): Error for topic subscription #{current_subscription}: #{e}"
29
-
30
- case e.code
31
- when :max_poll_exceeded, :transport # -147, -195
32
- reset_current_consumer
33
- end
34
-
35
- remain = remaining_time_ms(timeout_ms, started_at)
36
- raise if remain <= wait_before_retry_ms
37
-
38
- sleep wait_before_retry_ms/1000.0
39
- retry
40
- ensure
41
- @last_poll_read_nil_message = true if msg.nil?
21
+ def poll(max_wait_time_ms = @config.max_wait_time_ms)
22
+ batch_poll(max_wait_time_ms, 1).first
42
23
  end
43
24
 
44
- # XXX: messages are not guaranteed to be from the same partition
45
- def batch_poll(timeout_ms)
46
- @batch_started_at = Time.now
47
- @messages = []
48
- while collect_messages_for_batch? do
49
- remain = remaining_time_ms(timeout_ms, @batch_started_at)
50
- break if remain <= 0
51
- msg = poll(remain)
25
+ # batch_poll collects messages until any of the following occurs:
26
+ # - max_wait_time_ms time has passed
27
+ # - max_messages have been collected
28
+ # - a nil message was polled (end of topic, Kafka stalled, etc.)
29
+ #
30
+ # The messages are from a single topic, but potentially from more than one partition.
31
+ #
32
+ # Any errors during polling are retried in an exponential backoff fashion. If an error
33
+ # occurs, but there is no time left for a backoff and retry, it will return the
34
+ # already collected messages and only retry on the next call.
35
+ def batch_poll(max_wait_time_ms = @config.max_wait_time_ms, max_messages = @config.fetch_messages)
36
+ started_at = Time.now
37
+ remain_ms = max_wait_time_ms
38
+ maybe_select_next_consumer
39
+ messages = []
40
+
41
+ while remain_ms > 0 && messages.size < max_messages
42
+ remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
43
+ msg = poll_with_retries(remain_ms)
52
44
  break if msg.nil?
53
- @messages << msg
45
+ messages << msg
54
46
  end
55
- @messages
47
+
48
+ messages
56
49
  end
57
50
 
58
51
  def store_offset(message)
59
52
  current.store_offset(message)
53
+ rescue Rdkafka::RdkafkaError => e
54
+ if e.code == :state # -172
55
+ @logger.warn "Attempted to store_offset, but we're not subscribed to it: #{ErroneousStateError.new(e)}"
56
+ return
57
+ end
58
+ raise e
60
59
  end
61
60
 
62
61
  def commit
@@ -67,11 +66,17 @@ module Racecar
67
66
 
68
67
  def close
69
68
  each_subscribed(&:close)
69
+ @paused_tpls.clear
70
70
  end
71
71
 
72
72
  def current
73
73
  @consumers[@consumer_id_iterator.peek] ||= begin
74
- consumer = Rdkafka::Config.new(rdkafka_config(current_subscription)).consumer
74
+ consumer_config = Rdkafka::Config.new(rdkafka_config(current_subscription))
75
+ listener = RebalanceListener.new(@config.consumer_class, @instrumenter)
76
+ consumer_config.consumer_rebalance_listener = listener
77
+ consumer = consumer_config.consumer
78
+ listener.rdkafka_consumer = consumer
79
+
75
80
  @instrumenter.instrument('join_group') do
76
81
  consumer.subscribe current_subscription.topic
77
82
  end
@@ -97,16 +102,25 @@ module Racecar
97
102
  consumer.pause(filtered_tpl)
98
103
  fake_msg = OpenStruct.new(topic: topic, partition: partition, offset: offset)
99
104
  consumer.seek(fake_msg)
105
+
106
+ @paused_tpls[topic][partition] = [consumer, filtered_tpl]
100
107
  end
101
108
 
102
109
  def resume(topic, partition)
103
110
  consumer, filtered_tpl = find_consumer_by(topic, partition)
111
+
112
+ if !consumer && @paused_tpls[topic][partition]
113
+ consumer, filtered_tpl = @paused_tpls[topic][partition]
114
+ end
115
+
104
116
  if !consumer
105
117
  @logger.info "Attempted to resume #{topic}/#{partition}, but we're not subscribed to it"
106
118
  return
107
119
  end
108
120
 
109
121
  consumer.resume(filtered_tpl)
122
+ @paused_tpls[topic].delete(partition)
123
+ @paused_tpls.delete(topic) if @paused_tpls[topic].empty?
110
124
  end
111
125
 
112
126
  alias :each :each_subscribed
@@ -123,6 +137,55 @@ module Racecar
123
137
 
124
138
  private
125
139
 
140
+ # polls a single message from the current consumer, retrying errors with exponential
141
+ # backoff. The sleep time is capped by max_wait_time_ms. If there's enough time budget
142
+ # left, it will retry before returning. If there isn't, the retry will only occur on
143
+ # the next call. It tries up to MAX_POLL_TRIES before passing on the exception.
144
+ def poll_with_retries(max_wait_time_ms)
145
+ try ||= @previous_retries
146
+ @previous_retries = 0
147
+ started_at ||= Time.now
148
+ remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
149
+
150
+ wait_ms = try == 0 ? 0 : 50 * (2**try) # 0ms, 100ms, 200ms, 400ms, …
151
+ if wait_ms >= max_wait_time_ms && remain_ms > 1
152
+ @logger.debug "Capping #{wait_ms}ms to #{max_wait_time_ms-1}ms."
153
+ sleep (max_wait_time_ms-1)/1000.0
154
+ remain_ms = 1
155
+ elsif try == 0 && remain_ms == 0
156
+ @logger.debug "No time remains for polling messages. Will try on next call."
157
+ return nil
158
+ elsif wait_ms >= remain_ms
159
+ @logger.warn "Only #{remain_ms}ms left, but want to wait for #{wait_ms}ms before poll. Will retry on next call."
160
+ @previous_retries = try
161
+ return nil
162
+ elsif wait_ms > 0
163
+ sleep wait_ms/1000.0
164
+ remain_ms -= wait_ms
165
+ end
166
+
167
+ poll_current_consumer(remain_ms)
168
+ rescue Rdkafka::RdkafkaError => e
169
+ try += 1
170
+ @instrumenter.instrument("poll_retry", try: try, rdkafka_time_limit: remain_ms, exception: e)
171
+ @logger.error "(try #{try}/#{MAX_POLL_TRIES}): Error for topic subscription #{current_subscription}: #{e}"
172
+ raise if try >= MAX_POLL_TRIES
173
+ retry
174
+ end
175
+
176
+ # polls a message for the current consumer, handling any API edge cases.
177
+ def poll_current_consumer(max_wait_time_ms)
178
+ msg = current.poll(max_wait_time_ms)
179
+ rescue Rdkafka::RdkafkaError => e
180
+ case e.code
181
+ when :max_poll_exceeded, :transport, :not_coordinator # -147, -195, 16
182
+ reset_current_consumer
183
+ end
184
+ raise
185
+ ensure
186
+ @last_poll_read_nil_message = msg.nil?
187
+ end
188
+
126
189
  def find_consumer_by(topic, partition)
127
190
  each do |consumer|
128
191
  tpl = consumer.assignment.to_h
@@ -140,7 +203,12 @@ module Racecar
140
203
  end
141
204
 
142
205
  def reset_current_consumer
143
- @consumers[@consumer_id_iterator.peek] = nil
206
+ current_consumer_id = @consumer_id_iterator.peek
207
+ @logger.info "Resetting consumer with id: #{current_consumer_id}"
208
+
209
+ consumer = @consumers[current_consumer_id]
210
+ consumer.close unless consumer.nil?
211
+ @consumers[current_consumer_id] = nil
144
212
  end
145
213
 
146
214
  def maybe_select_next_consumer
@@ -160,14 +228,14 @@ module Racecar
160
228
  @logger.debug "Nothing to commit."
161
229
  end
162
230
 
163
- def collect_messages_for_batch?
164
- @messages.size < @config.fetch_messages &&
165
- (Time.now - @batch_started_at) < @config.max_wait_time
166
- end
167
-
168
231
  def rdkafka_config(subscription)
169
232
  # https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
170
233
  config = {
234
+ # Manually store offset after messages have been processed successfully
235
+ # to avoid marking failed messages as committed. The call just updates
236
+ # a value within librdkafka and is asynchronously written to proper
237
+ # storage through auto commits.
238
+ "enable.auto.offset.store" => false,
171
239
  "auto.commit.interval.ms" => @config.offset_commit_interval * 1000,
172
240
  "auto.offset.reset" => subscription.start_from_beginning ? "earliest" : "largest",
173
241
  "bootstrap.servers" => @config.brokers.join(","),
@@ -183,7 +251,8 @@ module Racecar
183
251
  "queued.min.messages" => @config.min_message_queue_size,
184
252
  "session.timeout.ms" => @config.session_timeout * 1000,
185
253
  "socket.timeout.ms" => @config.socket_timeout * 1000,
186
- "statistics.interval.ms" => 1000, # 1s is the highest granularity offered
254
+ "statistics.interval.ms" => @config.statistics_interval_ms,
255
+ "partition.assignment.strategy" => @config.partition_assignment_strategy,
187
256
  }
188
257
  config.merge! @config.rdkafka_consumer
189
258
  config.merge! subscription.additional_config
data/lib/racecar/ctl.rb CHANGED
@@ -1,6 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "optparse"
2
4
  require "racecar/rails_config_file_loader"
3
5
  require "racecar/daemon"
6
+ require "racecar/message_delivery_error"
4
7
 
5
8
  module Racecar
6
9
  class Ctl
@@ -29,6 +32,21 @@ module Racecar
29
32
  @command = command
30
33
  end
31
34
 
35
+ def liveness_probe(args)
36
+ require "racecar/liveness_probe"
37
+ parse_options!(args)
38
+
39
+ if ENV["RAILS_ENV"] && File.exist?("config/racecar.yml")
40
+ Racecar.config.load_file("config/racecar.yml", ENV["RAILS_ENV"])
41
+ end
42
+
43
+ if File.exist?("config/racecar.rb")
44
+ require "./config/racecar"
45
+ end
46
+
47
+ Racecar.config.liveness_probe.check_liveness_within_interval!
48
+ end
49
+
32
50
  def status(args)
33
51
  parse_options!(args)
34
52
 
@@ -94,11 +112,17 @@ module Racecar
94
112
  Racecar.config.validate!
95
113
 
96
114
  producer = Rdkafka::Config.new({
97
- "bootstrap.servers": Racecar.config.brokers.join(","),
98
- "client.id": Racecar.config.client_id,
115
+ "bootstrap.servers": Racecar.config.brokers.join(","),
116
+ "client.id": Racecar.config.client_id,
117
+ "message.timeout.ms": Racecar.config.message_timeout * 1000,
99
118
  }.merge(Racecar.config.rdkafka_producer)).producer
100
119
 
101
- producer.produce(payload: message.value, key: message.key, topic: message.topic).wait
120
+ handle = producer.produce(payload: message.value, key: message.key, topic: message.topic)
121
+ begin
122
+ handle.wait(max_wait_timeout: Racecar.config.message_timeout)
123
+ rescue Rdkafka::RdkafkaError => e
124
+ raise MessageDeliveryError.new(e, handle)
125
+ end
102
126
 
103
127
  $stderr.puts "=> Delivered message to Kafka cluster"
104
128
  end
@@ -116,5 +140,9 @@ module Racecar
116
140
 
117
141
  parser.parse!(args)
118
142
  end
143
+
144
+ def config
145
+ Racecar.config
146
+ end
119
147
  end
120
148
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  class Daemon
3
5
  attr_reader :pidfile
@@ -52,7 +54,7 @@ module Racecar
52
54
  end
53
55
 
54
56
  def pid
55
- if File.exists?(pidfile)
57
+ if File.exist?(pidfile)
56
58
  File.read(pidfile).to_i
57
59
  else
58
60
  nil
@@ -87,7 +89,7 @@ module Racecar
87
89
  end
88
90
 
89
91
  at_exit do
90
- File.delete(pidfile) if File.exists?(pidfile)
92
+ File.delete(pidfile) if File.exist?(pidfile)
91
93
  end
92
94
  rescue Errno::EEXIST
93
95
  check_pid
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  begin
2
4
  require "datadog/statsd"
3
5
  rescue LoadError
@@ -61,10 +63,14 @@ module Racecar
61
63
  clear
62
64
  end
63
65
 
66
+ def close
67
+ @statsd&.close
68
+ end
69
+
64
70
  private
65
71
 
66
72
  def clear
67
- @statsd && @statsd.close
73
+ close
68
74
  @statsd = nil
69
75
  end
70
76
  end
@@ -73,8 +79,8 @@ module Racecar
73
79
  private
74
80
 
75
81
  %w[increment histogram count timing gauge].each do |type|
76
- define_method(type) do |*args|
77
- emit(type, *args)
82
+ define_method(type) do |*args, **kwargs|
83
+ emit(type, *args, **kwargs)
78
84
  end
79
85
  end
80
86
 
@@ -155,6 +161,15 @@ module Racecar
155
161
  end
156
162
  end
157
163
 
164
+ def poll_retry(event)
165
+ tags = {
166
+ client: event.payload.fetch(:client_id),
167
+ group_id: event.payload.fetch(:group_id),
168
+ }
169
+ rdkafka_error_code = event.payload.fetch(:exception).code.to_s.gsub(/\W/, '')
170
+ increment("consumer.poll.rdkafka_error.#{rdkafka_error_code}", tags: tags)
171
+ end
172
+
158
173
  def main_loop(event)
159
174
  tags = {
160
175
  client: event.payload.fetch(:client_id),
@@ -196,6 +211,10 @@ module Racecar
196
211
  topic: topic,
197
212
  }
198
213
 
214
+ if event.payload.key?(:exception)
215
+ increment("producer.produce.errors", tags: tags)
216
+ end
217
+
199
218
  # This gets us the write rate.
200
219
  increment("producer.produce.messages", tags: tags.merge(topic: topic))
201
220
 
@@ -230,6 +249,67 @@ module Racecar
230
249
  increment("producer.ack.messages", tags: tags)
231
250
  end
232
251
 
252
+ def produce_delivery_error(event)
253
+ tags = {
254
+ client: event.payload.fetch(:client_id),
255
+ }
256
+
257
+ increment("producer.produce.delivery.errors", tags: tags)
258
+ end
259
+
260
+ def produce_async(event)
261
+ client = event.payload.fetch(:client_id)
262
+ topic = event.payload.fetch(:topic)
263
+ message_size = event.payload.fetch(:message_size)
264
+ buffer_size = event.payload.fetch(:buffer_size)
265
+
266
+ tags = {
267
+ client: client,
268
+ topic: topic,
269
+ }
270
+
271
+ if event.payload.key?(:exception)
272
+ increment("producer.produce.errors", tags: tags)
273
+ end
274
+
275
+ # This gets us the write rate.
276
+ increment("producer.produce.messages", tags: tags.merge(topic: topic))
277
+
278
+ # Information about typical/average/95p message size.
279
+ histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
280
+
281
+ # Aggregate message size.
282
+ count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
283
+
284
+ # This gets us the avg/max buffer size per producer.
285
+ histogram("producer.buffer.size", buffer_size, tags: tags)
286
+ end
287
+
288
+ def produce_sync(event)
289
+ client = event.payload.fetch(:client_id)
290
+ topic = event.payload.fetch(:topic)
291
+ message_size = event.payload.fetch(:message_size)
292
+
293
+ tags = {
294
+ client: client,
295
+ topic: topic,
296
+ }
297
+
298
+ if event.payload.key?(:exception)
299
+ increment("producer.produce.errors", tags: tags)
300
+ end
301
+
302
+
303
+ # This gets us the write rate.
304
+ increment("producer.produce.messages", tags: tags.merge(topic: topic))
305
+
306
+ # Information about typical/average/95p message size.
307
+ histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
308
+
309
+ # Aggregate message size.
310
+ count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
311
+ end
312
+
233
313
  attach_to "racecar"
234
314
  end
235
315
  end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Racecar
4
+ class DeliveryCallback
5
+ attr_reader :instrumenter
6
+
7
+ def initialize(instrumenter:)
8
+ @instrumenter = instrumenter
9
+ end
10
+
11
+ def call(delivery_report)
12
+ if delivery_report.error.to_i.zero?
13
+ payload = {
14
+ offset: delivery_report.offset,
15
+ partition: delivery_report.partition
16
+ }
17
+ instrumenter.instrument("acknowledged_message", payload)
18
+ else
19
+ payload = {
20
+ partition: delivery_report.partition,
21
+ exception: delivery_report.error
22
+ }
23
+ instrumenter.instrument("produce_delivery_error", payload)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,34 @@
1
+ # `rd_kafka_offsets_store()` (et.al) returns an error for any
2
+ # partition that is not currently assigned (through `rd_kafka_*assign()`).
3
+ # This prevents a race condition where an application would store offsets
4
+ # after the assigned partitions had been revoked (which resets the stored
5
+ # offset), that could cause these old stored offsets to be committed later
6
+ # when the same partitions were assigned to this consumer again - effectively
7
+ # overwriting any committed offsets by any consumers that were assigned the
8
+ # same partitions previously. This would typically result in the offsets
9
+ # rewinding and messages to be reprocessed.
10
+ # As an extra effort to avoid this situation the stored offset is now
11
+ # also reset when partitions are assigned (through `rd_kafka_*assign()`).
12
+ module Racecar
13
+ class ErroneousStateError < StandardError
14
+ def initialize(rdkafka_error)
15
+ raise rdkafka_error unless rdkafka_error.is_a?(Rdkafka::RdkafkaError)
16
+
17
+ @rdkafka_error = rdkafka_error
18
+ end
19
+
20
+ attr_reader :rdkafka_error
21
+
22
+ def code
23
+ @rdkafka_error.code
24
+ end
25
+
26
+ def to_s
27
+ <<~EOM
28
+ Partition is no longer assigned to this consumer and the offset could not be stored for commit.
29
+ #{@rdkafka_error.to_s}
30
+ EOM
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tempfile'
4
+
5
+ # Heroku Kafka addon provides 4 ENVs to connect to their Kafka Broker
6
+ # KAFKA_TRUSTED_CERT, KAFKA_CLIENT_CERT, KAFKA_CLIENT_CERT_KEY, KAFKA_URL
7
+ # This will work only if the Heroku Kafka add-on is aliased to "KAFKA"
8
+
9
+ $stderr.puts "=> Loading configuration from Heroku Kafka ENVs"
10
+
11
+ module Racecar
12
+ module Heroku
13
+ def self.load_configuration!
14
+ [
15
+ "KAFKA_URL",
16
+ "KAFKA_TRUSTED_CERT",
17
+ "KAFKA_CLIENT_CERT",
18
+ "KAFKA_CLIENT_CERT_KEY"
19
+ ]. each do |env_name|
20
+ if ENV[env_name].nil?
21
+ $stderr.puts "Error: ENV #{env_name} is not set"
22
+ exit 1
23
+ end
24
+ end
25
+
26
+ Racecar.configure do |config|
27
+ ca_cert = ENV["KAFKA_TRUSTED_CERT"]
28
+ client_cert = ENV["KAFKA_CLIENT_CERT"]
29
+ client_cert_key = ENV["KAFKA_CLIENT_CERT_KEY"]
30
+
31
+ tmp_file_path = lambda do |data|
32
+ tempfile = Tempfile.new(['', '.pem'])
33
+ tempfile << data
34
+ tempfile.close
35
+ tempfile.path
36
+ end
37
+
38
+ config.security_protocol = :ssl
39
+ config.ssl_ca_location = tmp_file_path.call(ca_cert)
40
+ config.ssl_certificate_location = tmp_file_path.call(client_cert)
41
+ config.ssl_key_location = tmp_file_path.call(client_cert_key)
42
+
43
+ config.brokers = ENV["KAFKA_URL"].to_s.gsub('kafka+ssl://', '').split(',')
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ Racecar::Heroku.load_configuration!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  ##
3
5
  # Common API for instrumentation to standardize
@@ -7,14 +9,9 @@ module Racecar
7
9
  NAMESPACE = "racecar"
8
10
  attr_reader :backend
9
11
 
10
- def initialize(default_payload = {})
12
+ def initialize(backend:, default_payload: {})
13
+ @backend = backend
11
14
  @default_payload = default_payload
12
-
13
- @backend = if defined?(ActiveSupport::Notifications)
14
- ActiveSupport::Notifications
15
- else
16
- NullInstrumenter
17
- end
18
15
  end
19
16
 
20
17
  def instrument(event_name, payload = {}, &block)
@@ -0,0 +1,78 @@
1
+ require "fileutils"
2
+
3
+ module Racecar
4
+ class LivenessProbe
5
+ def initialize(message_bus, file_path, max_interval)
6
+ @message_bus = message_bus
7
+ @file_path = file_path
8
+ @max_interval = max_interval
9
+ @subscribers = []
10
+ end
11
+
12
+ attr_reader :message_bus, :file_path, :max_interval, :subscribers
13
+ private :message_bus, :file_path, :max_interval, :subscribers
14
+
15
+ def check_liveness_within_interval!
16
+ unless liveness_event_within_interval?
17
+ $stderr.puts "Racecar healthcheck failed: No liveness within interval #{max_interval}s. Last liveness at #{last_liveness_event_at}, #{elapsed_since_liveness_event} seconds ago."
18
+ Process.exit(1)
19
+ end
20
+ end
21
+
22
+ def liveness_event_within_interval?
23
+ elapsed_since_liveness_event < max_interval
24
+ rescue Errno::ENOENT
25
+ $stderr.puts "Racecar healthcheck failed: Liveness file not found `#{file_path}`"
26
+ Process.exit(1)
27
+ end
28
+
29
+ def install
30
+ unless file_path && file_writeable?
31
+ raise(
32
+ "Liveness probe configuration error: `liveness_probe_file_path` must be set to a writable file path.\n" \
33
+ " Set `RACECAR_LIVENESS_PROBE_FILE_PATH` and `RACECAR_LIVENESS_MAX_INTERVAL` environment variables."
34
+ )
35
+ end
36
+
37
+ subscribers << message_bus.subscribe("start_main_loop.racecar") do
38
+ touch_liveness_file
39
+ end
40
+
41
+ subscribers = message_bus.subscribe("shut_down.racecar") do
42
+ delete_liveness_file
43
+ end
44
+
45
+ nil
46
+ end
47
+
48
+ def uninstall
49
+ subscribers.each { |s| message_bus.unsubscribe(s) }
50
+ end
51
+
52
+ private
53
+
54
+ def elapsed_since_liveness_event
55
+ Time.now - last_liveness_event_at
56
+ end
57
+
58
+ def last_liveness_event_at
59
+ File.mtime(file_path)
60
+ end
61
+
62
+ def touch_liveness_file
63
+ FileUtils.touch(file_path)
64
+ end
65
+
66
+ def delete_liveness_file
67
+ FileUtils.rm_rf(file_path)
68
+ end
69
+
70
+ def file_writeable?
71
+ File.write(file_path, "")
72
+ File.unlink(file_path)
73
+ true
74
+ rescue
75
+ false
76
+ end
77
+ end
78
+ end
@@ -1,11 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "forwardable"
2
4
 
3
5
  module Racecar
4
6
  class Message
5
7
  extend Forwardable
6
8
 
7
- def initialize(rdkafka_message)
9
+ attr_reader :retries_count
10
+
11
+ def initialize(rdkafka_message, retries_count: nil)
8
12
  @rdkafka_message = rdkafka_message
13
+ @retries_count = retries_count
9
14
  end
10
15
 
11
16
  def_delegators :@rdkafka_message, :topic, :partition, :offset, :key, :headers