racecar 2.0.0 → 2.10.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +17 -0
  3. data/.github/workflows/ci.yml +46 -0
  4. data/.github/workflows/publish.yml +12 -0
  5. data/.gitignore +1 -2
  6. data/CHANGELOG.md +83 -1
  7. data/Dockerfile +9 -0
  8. data/Gemfile +6 -0
  9. data/Gemfile.lock +72 -0
  10. data/README.md +303 -82
  11. data/Rakefile +5 -0
  12. data/docker-compose.yml +65 -0
  13. data/examples/batch_consumer.rb +4 -2
  14. data/examples/cat_consumer.rb +2 -0
  15. data/examples/producing_consumer.rb +2 -0
  16. data/exe/racecar +37 -14
  17. data/extra/datadog-dashboard.json +1 -0
  18. data/lib/ensure_hash_compact.rb +2 -0
  19. data/lib/generators/racecar/consumer_generator.rb +2 -0
  20. data/lib/generators/racecar/install_generator.rb +2 -0
  21. data/lib/racecar/cli.rb +26 -21
  22. data/lib/racecar/config.rb +80 -4
  23. data/lib/racecar/consumer.rb +51 -6
  24. data/lib/racecar/consumer_set.rb +113 -44
  25. data/lib/racecar/ctl.rb +31 -3
  26. data/lib/racecar/daemon.rb +4 -2
  27. data/lib/racecar/datadog.rb +83 -3
  28. data/lib/racecar/delivery_callback.rb +27 -0
  29. data/lib/racecar/erroneous_state_error.rb +34 -0
  30. data/lib/racecar/heroku.rb +49 -0
  31. data/lib/racecar/instrumenter.rb +4 -7
  32. data/lib/racecar/liveness_probe.rb +78 -0
  33. data/lib/racecar/message.rb +6 -1
  34. data/lib/racecar/message_delivery_error.rb +112 -0
  35. data/lib/racecar/null_instrumenter.rb +2 -0
  36. data/lib/racecar/parallel_runner.rb +110 -0
  37. data/lib/racecar/pause.rb +8 -4
  38. data/lib/racecar/producer.rb +139 -0
  39. data/lib/racecar/rails_config_file_loader.rb +7 -1
  40. data/lib/racecar/rebalance_listener.rb +58 -0
  41. data/lib/racecar/runner.rb +79 -37
  42. data/lib/racecar/version.rb +3 -1
  43. data/lib/racecar.rb +36 -8
  44. data/racecar.gemspec +7 -4
  45. metadata +47 -25
  46. data/.github/workflows/rspec.yml +0 -24
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  class ConsumerSet
3
5
  MAX_POLL_TRIES = 10
@@ -10,53 +12,50 @@ module Racecar
10
12
  @consumers = []
11
13
  @consumer_id_iterator = (0...@config.subscriptions.size).cycle
12
14
 
15
+ @previous_retries = 0
16
+
13
17
  @last_poll_read_nil_message = false
18
+ @paused_tpls = Hash.new { |h, k| h[k] = {} }
14
19
  end
15
20
 
16
- def poll(timeout_ms)
17
- maybe_select_next_consumer
18
- started_at ||= Time.now
19
- try ||= 0
20
- remain ||= timeout_ms
21
-
22
- msg = remain <= 0 ? nil : current.poll(remain)
23
- rescue Rdkafka::RdkafkaError => e
24
- wait_before_retry_ms = 100 * (2**try) # 100ms, 200ms, 400ms, …
25
- try += 1
26
- raise if try >= MAX_POLL_TRIES || remain <= wait_before_retry_ms
27
-
28
- @logger.error "(try #{try}): Error for topic subscription #{current_subscription}: #{e}"
29
-
30
- case e.code
31
- when :max_poll_exceeded, :transport # -147, -195
32
- reset_current_consumer
33
- end
34
-
35
- remain = remaining_time_ms(timeout_ms, started_at)
36
- raise if remain <= wait_before_retry_ms
37
-
38
- sleep wait_before_retry_ms/1000.0
39
- retry
40
- ensure
41
- @last_poll_read_nil_message = true if msg.nil?
21
+ def poll(max_wait_time_ms = @config.max_wait_time_ms)
22
+ batch_poll(max_wait_time_ms, 1).first
42
23
  end
43
24
 
44
- # XXX: messages are not guaranteed to be from the same partition
45
- def batch_poll(timeout_ms)
46
- @batch_started_at = Time.now
47
- @messages = []
48
- while collect_messages_for_batch? do
49
- remain = remaining_time_ms(timeout_ms, @batch_started_at)
50
- break if remain <= 0
51
- msg = poll(remain)
25
+ # batch_poll collects messages until any of the following occurs:
26
+ # - max_wait_time_ms time has passed
27
+ # - max_messages have been collected
28
+ # - a nil message was polled (end of topic, Kafka stalled, etc.)
29
+ #
30
+ # The messages are from a single topic, but potentially from more than one partition.
31
+ #
32
+ # Any errors during polling are retried in an exponential backoff fashion. If an error
33
+ # occurs, but there is no time left for a backoff and retry, it will return the
34
+ # already collected messages and only retry on the next call.
35
+ def batch_poll(max_wait_time_ms = @config.max_wait_time_ms, max_messages = @config.fetch_messages)
36
+ started_at = Time.now
37
+ remain_ms = max_wait_time_ms
38
+ maybe_select_next_consumer
39
+ messages = []
40
+
41
+ while remain_ms > 0 && messages.size < max_messages
42
+ remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
43
+ msg = poll_with_retries(remain_ms)
52
44
  break if msg.nil?
53
- @messages << msg
45
+ messages << msg
54
46
  end
55
- @messages
47
+
48
+ messages
56
49
  end
57
50
 
58
51
  def store_offset(message)
59
52
  current.store_offset(message)
53
+ rescue Rdkafka::RdkafkaError => e
54
+ if e.code == :state # -172
55
+ @logger.warn "Attempted to store_offset, but we're not subscribed to it: #{ErroneousStateError.new(e)}"
56
+ return
57
+ end
58
+ raise e
60
59
  end
61
60
 
62
61
  def commit
@@ -67,11 +66,17 @@ module Racecar
67
66
 
68
67
  def close
69
68
  each_subscribed(&:close)
69
+ @paused_tpls.clear
70
70
  end
71
71
 
72
72
  def current
73
73
  @consumers[@consumer_id_iterator.peek] ||= begin
74
- consumer = Rdkafka::Config.new(rdkafka_config(current_subscription)).consumer
74
+ consumer_config = Rdkafka::Config.new(rdkafka_config(current_subscription))
75
+ listener = RebalanceListener.new(@config.consumer_class, @instrumenter)
76
+ consumer_config.consumer_rebalance_listener = listener
77
+ consumer = consumer_config.consumer
78
+ listener.rdkafka_consumer = consumer
79
+
75
80
  @instrumenter.instrument('join_group') do
76
81
  consumer.subscribe current_subscription.topic
77
82
  end
@@ -97,16 +102,25 @@ module Racecar
97
102
  consumer.pause(filtered_tpl)
98
103
  fake_msg = OpenStruct.new(topic: topic, partition: partition, offset: offset)
99
104
  consumer.seek(fake_msg)
105
+
106
+ @paused_tpls[topic][partition] = [consumer, filtered_tpl]
100
107
  end
101
108
 
102
109
  def resume(topic, partition)
103
110
  consumer, filtered_tpl = find_consumer_by(topic, partition)
111
+
112
+ if !consumer && @paused_tpls[topic][partition]
113
+ consumer, filtered_tpl = @paused_tpls[topic][partition]
114
+ end
115
+
104
116
  if !consumer
105
117
  @logger.info "Attempted to resume #{topic}/#{partition}, but we're not subscribed to it"
106
118
  return
107
119
  end
108
120
 
109
121
  consumer.resume(filtered_tpl)
122
+ @paused_tpls[topic].delete(partition)
123
+ @paused_tpls.delete(topic) if @paused_tpls[topic].empty?
110
124
  end
111
125
 
112
126
  alias :each :each_subscribed
@@ -123,6 +137,55 @@ module Racecar
123
137
 
124
138
  private
125
139
 
140
+ # polls a single message from the current consumer, retrying errors with exponential
141
+ # backoff. The sleep time is capped by max_wait_time_ms. If there's enough time budget
142
+ # left, it will retry before returning. If there isn't, the retry will only occur on
143
+ # the next call. It tries up to MAX_POLL_TRIES before passing on the exception.
144
+ def poll_with_retries(max_wait_time_ms)
145
+ try ||= @previous_retries
146
+ @previous_retries = 0
147
+ started_at ||= Time.now
148
+ remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
149
+
150
+ wait_ms = try == 0 ? 0 : 50 * (2**try) # 0ms, 100ms, 200ms, 400ms, …
151
+ if wait_ms >= max_wait_time_ms && remain_ms > 1
152
+ @logger.debug "Capping #{wait_ms}ms to #{max_wait_time_ms-1}ms."
153
+ sleep (max_wait_time_ms-1)/1000.0
154
+ remain_ms = 1
155
+ elsif try == 0 && remain_ms == 0
156
+ @logger.debug "No time remains for polling messages. Will try on next call."
157
+ return nil
158
+ elsif wait_ms >= remain_ms
159
+ @logger.warn "Only #{remain_ms}ms left, but want to wait for #{wait_ms}ms before poll. Will retry on next call."
160
+ @previous_retries = try
161
+ return nil
162
+ elsif wait_ms > 0
163
+ sleep wait_ms/1000.0
164
+ remain_ms -= wait_ms
165
+ end
166
+
167
+ poll_current_consumer(remain_ms)
168
+ rescue Rdkafka::RdkafkaError => e
169
+ try += 1
170
+ @instrumenter.instrument("poll_retry", try: try, rdkafka_time_limit: remain_ms, exception: e)
171
+ @logger.error "(try #{try}/#{MAX_POLL_TRIES}): Error for topic subscription #{current_subscription}: #{e}"
172
+ raise if try >= MAX_POLL_TRIES
173
+ retry
174
+ end
175
+
176
+ # polls a message for the current consumer, handling any API edge cases.
177
+ def poll_current_consumer(max_wait_time_ms)
178
+ msg = current.poll(max_wait_time_ms)
179
+ rescue Rdkafka::RdkafkaError => e
180
+ case e.code
181
+ when :max_poll_exceeded, :transport, :not_coordinator # -147, -195, 16
182
+ reset_current_consumer
183
+ end
184
+ raise
185
+ ensure
186
+ @last_poll_read_nil_message = msg.nil?
187
+ end
188
+
126
189
  def find_consumer_by(topic, partition)
127
190
  each do |consumer|
128
191
  tpl = consumer.assignment.to_h
@@ -140,7 +203,12 @@ module Racecar
140
203
  end
141
204
 
142
205
  def reset_current_consumer
143
- @consumers[@consumer_id_iterator.peek] = nil
206
+ current_consumer_id = @consumer_id_iterator.peek
207
+ @logger.info "Resetting consumer with id: #{current_consumer_id}"
208
+
209
+ consumer = @consumers[current_consumer_id]
210
+ consumer.close unless consumer.nil?
211
+ @consumers[current_consumer_id] = nil
144
212
  end
145
213
 
146
214
  def maybe_select_next_consumer
@@ -160,14 +228,14 @@ module Racecar
160
228
  @logger.debug "Nothing to commit."
161
229
  end
162
230
 
163
- def collect_messages_for_batch?
164
- @messages.size < @config.fetch_messages &&
165
- (Time.now - @batch_started_at) < @config.max_wait_time
166
- end
167
-
168
231
  def rdkafka_config(subscription)
169
232
  # https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
170
233
  config = {
234
+ # Manually store offset after messages have been processed successfully
235
+ # to avoid marking failed messages as committed. The call just updates
236
+ # a value within librdkafka and is asynchronously written to proper
237
+ # storage through auto commits.
238
+ "enable.auto.offset.store" => false,
171
239
  "auto.commit.interval.ms" => @config.offset_commit_interval * 1000,
172
240
  "auto.offset.reset" => subscription.start_from_beginning ? "earliest" : "largest",
173
241
  "bootstrap.servers" => @config.brokers.join(","),
@@ -183,7 +251,8 @@ module Racecar
183
251
  "queued.min.messages" => @config.min_message_queue_size,
184
252
  "session.timeout.ms" => @config.session_timeout * 1000,
185
253
  "socket.timeout.ms" => @config.socket_timeout * 1000,
186
- "statistics.interval.ms" => 1000, # 1s is the highest granularity offered
254
+ "statistics.interval.ms" => @config.statistics_interval_ms,
255
+ "partition.assignment.strategy" => @config.partition_assignment_strategy,
187
256
  }
188
257
  config.merge! @config.rdkafka_consumer
189
258
  config.merge! subscription.additional_config
data/lib/racecar/ctl.rb CHANGED
@@ -1,6 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "optparse"
2
4
  require "racecar/rails_config_file_loader"
3
5
  require "racecar/daemon"
6
+ require "racecar/message_delivery_error"
4
7
 
5
8
  module Racecar
6
9
  class Ctl
@@ -29,6 +32,21 @@ module Racecar
29
32
  @command = command
30
33
  end
31
34
 
35
+ def liveness_probe(args)
36
+ require "racecar/liveness_probe"
37
+ parse_options!(args)
38
+
39
+ if ENV["RAILS_ENV"] && File.exist?("config/racecar.yml")
40
+ Racecar.config.load_file("config/racecar.yml", ENV["RAILS_ENV"])
41
+ end
42
+
43
+ if File.exist?("config/racecar.rb")
44
+ require "./config/racecar"
45
+ end
46
+
47
+ Racecar.config.liveness_probe.check_liveness_within_interval!
48
+ end
49
+
32
50
  def status(args)
33
51
  parse_options!(args)
34
52
 
@@ -94,11 +112,17 @@ module Racecar
94
112
  Racecar.config.validate!
95
113
 
96
114
  producer = Rdkafka::Config.new({
97
- "bootstrap.servers": Racecar.config.brokers.join(","),
98
- "client.id": Racecar.config.client_id,
115
+ "bootstrap.servers": Racecar.config.brokers.join(","),
116
+ "client.id": Racecar.config.client_id,
117
+ "message.timeout.ms": Racecar.config.message_timeout * 1000,
99
118
  }.merge(Racecar.config.rdkafka_producer)).producer
100
119
 
101
- producer.produce(payload: message.value, key: message.key, topic: message.topic).wait
120
+ handle = producer.produce(payload: message.value, key: message.key, topic: message.topic)
121
+ begin
122
+ handle.wait(max_wait_timeout: Racecar.config.message_timeout)
123
+ rescue Rdkafka::RdkafkaError => e
124
+ raise MessageDeliveryError.new(e, handle)
125
+ end
102
126
 
103
127
  $stderr.puts "=> Delivered message to Kafka cluster"
104
128
  end
@@ -116,5 +140,9 @@ module Racecar
116
140
 
117
141
  parser.parse!(args)
118
142
  end
143
+
144
+ def config
145
+ Racecar.config
146
+ end
119
147
  end
120
148
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  class Daemon
3
5
  attr_reader :pidfile
@@ -52,7 +54,7 @@ module Racecar
52
54
  end
53
55
 
54
56
  def pid
55
- if File.exists?(pidfile)
57
+ if File.exist?(pidfile)
56
58
  File.read(pidfile).to_i
57
59
  else
58
60
  nil
@@ -87,7 +89,7 @@ module Racecar
87
89
  end
88
90
 
89
91
  at_exit do
90
- File.delete(pidfile) if File.exists?(pidfile)
92
+ File.delete(pidfile) if File.exist?(pidfile)
91
93
  end
92
94
  rescue Errno::EEXIST
93
95
  check_pid
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  begin
2
4
  require "datadog/statsd"
3
5
  rescue LoadError
@@ -61,10 +63,14 @@ module Racecar
61
63
  clear
62
64
  end
63
65
 
66
+ def close
67
+ @statsd&.close
68
+ end
69
+
64
70
  private
65
71
 
66
72
  def clear
67
- @statsd && @statsd.close
73
+ close
68
74
  @statsd = nil
69
75
  end
70
76
  end
@@ -73,8 +79,8 @@ module Racecar
73
79
  private
74
80
 
75
81
  %w[increment histogram count timing gauge].each do |type|
76
- define_method(type) do |*args|
77
- emit(type, *args)
82
+ define_method(type) do |*args, **kwargs|
83
+ emit(type, *args, **kwargs)
78
84
  end
79
85
  end
80
86
 
@@ -155,6 +161,15 @@ module Racecar
155
161
  end
156
162
  end
157
163
 
164
+ def poll_retry(event)
165
+ tags = {
166
+ client: event.payload.fetch(:client_id),
167
+ group_id: event.payload.fetch(:group_id),
168
+ }
169
+ rdkafka_error_code = event.payload.fetch(:exception).code.to_s.gsub(/\W/, '')
170
+ increment("consumer.poll.rdkafka_error.#{rdkafka_error_code}", tags: tags)
171
+ end
172
+
158
173
  def main_loop(event)
159
174
  tags = {
160
175
  client: event.payload.fetch(:client_id),
@@ -196,6 +211,10 @@ module Racecar
196
211
  topic: topic,
197
212
  }
198
213
 
214
+ if event.payload.key?(:exception)
215
+ increment("producer.produce.errors", tags: tags)
216
+ end
217
+
199
218
  # This gets us the write rate.
200
219
  increment("producer.produce.messages", tags: tags.merge(topic: topic))
201
220
 
@@ -230,6 +249,67 @@ module Racecar
230
249
  increment("producer.ack.messages", tags: tags)
231
250
  end
232
251
 
252
+ def produce_delivery_error(event)
253
+ tags = {
254
+ client: event.payload.fetch(:client_id),
255
+ }
256
+
257
+ increment("producer.produce.delivery.errors", tags: tags)
258
+ end
259
+
260
+ def produce_async(event)
261
+ client = event.payload.fetch(:client_id)
262
+ topic = event.payload.fetch(:topic)
263
+ message_size = event.payload.fetch(:message_size)
264
+ buffer_size = event.payload.fetch(:buffer_size)
265
+
266
+ tags = {
267
+ client: client,
268
+ topic: topic,
269
+ }
270
+
271
+ if event.payload.key?(:exception)
272
+ increment("producer.produce.errors", tags: tags)
273
+ end
274
+
275
+ # This gets us the write rate.
276
+ increment("producer.produce.messages", tags: tags.merge(topic: topic))
277
+
278
+ # Information about typical/average/95p message size.
279
+ histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
280
+
281
+ # Aggregate message size.
282
+ count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
283
+
284
+ # This gets us the avg/max buffer size per producer.
285
+ histogram("producer.buffer.size", buffer_size, tags: tags)
286
+ end
287
+
288
+ def produce_sync(event)
289
+ client = event.payload.fetch(:client_id)
290
+ topic = event.payload.fetch(:topic)
291
+ message_size = event.payload.fetch(:message_size)
292
+
293
+ tags = {
294
+ client: client,
295
+ topic: topic,
296
+ }
297
+
298
+ if event.payload.key?(:exception)
299
+ increment("producer.produce.errors", tags: tags)
300
+ end
301
+
302
+
303
+ # This gets us the write rate.
304
+ increment("producer.produce.messages", tags: tags.merge(topic: topic))
305
+
306
+ # Information about typical/average/95p message size.
307
+ histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
308
+
309
+ # Aggregate message size.
310
+ count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
311
+ end
312
+
233
313
  attach_to "racecar"
234
314
  end
235
315
  end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Racecar
4
+ class DeliveryCallback
5
+ attr_reader :instrumenter
6
+
7
+ def initialize(instrumenter:)
8
+ @instrumenter = instrumenter
9
+ end
10
+
11
+ def call(delivery_report)
12
+ if delivery_report.error.to_i.zero?
13
+ payload = {
14
+ offset: delivery_report.offset,
15
+ partition: delivery_report.partition
16
+ }
17
+ instrumenter.instrument("acknowledged_message", payload)
18
+ else
19
+ payload = {
20
+ partition: delivery_report.partition,
21
+ exception: delivery_report.error
22
+ }
23
+ instrumenter.instrument("produce_delivery_error", payload)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,34 @@
1
+ # `rd_kafka_offsets_store()` (et.al) returns an error for any
2
+ # partition that is not currently assigned (through `rd_kafka_*assign()`).
3
+ # This prevents a race condition where an application would store offsets
4
+ # after the assigned partitions had been revoked (which resets the stored
5
+ # offset), that could cause these old stored offsets to be committed later
6
+ # when the same partitions were assigned to this consumer again - effectively
7
+ # overwriting any committed offsets by any consumers that were assigned the
8
+ # same partitions previously. This would typically result in the offsets
9
+ # rewinding and messages to be reprocessed.
10
+ # As an extra effort to avoid this situation the stored offset is now
11
+ # also reset when partitions are assigned (through `rd_kafka_*assign()`).
12
+ module Racecar
13
+ class ErroneousStateError < StandardError
14
+ def initialize(rdkafka_error)
15
+ raise rdkafka_error unless rdkafka_error.is_a?(Rdkafka::RdkafkaError)
16
+
17
+ @rdkafka_error = rdkafka_error
18
+ end
19
+
20
+ attr_reader :rdkafka_error
21
+
22
+ def code
23
+ @rdkafka_error.code
24
+ end
25
+
26
+ def to_s
27
+ <<~EOM
28
+ Partition is no longer assigned to this consumer and the offset could not be stored for commit.
29
+ #{@rdkafka_error.to_s}
30
+ EOM
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tempfile'
4
+
5
+ # Heroku Kafka addon provides 4 ENVs to connect to their Kafka Broker
6
+ # KAFKA_TRUSTED_CERT, KAFKA_CLIENT_CERT, KAFKA_CLIENT_CERT_KEY, KAFKA_URL
7
+ # This will work only if the Heroku Kafka add-on is aliased to "KAFKA"
8
+
9
+ $stderr.puts "=> Loading configuration from Heroku Kafka ENVs"
10
+
11
+ module Racecar
12
+ module Heroku
13
+ def self.load_configuration!
14
+ [
15
+ "KAFKA_URL",
16
+ "KAFKA_TRUSTED_CERT",
17
+ "KAFKA_CLIENT_CERT",
18
+ "KAFKA_CLIENT_CERT_KEY"
19
+ ]. each do |env_name|
20
+ if ENV[env_name].nil?
21
+ $stderr.puts "Error: ENV #{env_name} is not set"
22
+ exit 1
23
+ end
24
+ end
25
+
26
+ Racecar.configure do |config|
27
+ ca_cert = ENV["KAFKA_TRUSTED_CERT"]
28
+ client_cert = ENV["KAFKA_CLIENT_CERT"]
29
+ client_cert_key = ENV["KAFKA_CLIENT_CERT_KEY"]
30
+
31
+ tmp_file_path = lambda do |data|
32
+ tempfile = Tempfile.new(['', '.pem'])
33
+ tempfile << data
34
+ tempfile.close
35
+ tempfile.path
36
+ end
37
+
38
+ config.security_protocol = :ssl
39
+ config.ssl_ca_location = tmp_file_path.call(ca_cert)
40
+ config.ssl_certificate_location = tmp_file_path.call(client_cert)
41
+ config.ssl_key_location = tmp_file_path.call(client_cert_key)
42
+
43
+ config.brokers = ENV["KAFKA_URL"].to_s.gsub('kafka+ssl://', '').split(',')
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ Racecar::Heroku.load_configuration!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  ##
3
5
  # Common API for instrumentation to standardize
@@ -7,14 +9,9 @@ module Racecar
7
9
  NAMESPACE = "racecar"
8
10
  attr_reader :backend
9
11
 
10
- def initialize(default_payload = {})
12
+ def initialize(backend:, default_payload: {})
13
+ @backend = backend
11
14
  @default_payload = default_payload
12
-
13
- @backend = if defined?(ActiveSupport::Notifications)
14
- ActiveSupport::Notifications
15
- else
16
- NullInstrumenter
17
- end
18
15
  end
19
16
 
20
17
  def instrument(event_name, payload = {}, &block)
@@ -0,0 +1,78 @@
1
+ require "fileutils"
2
+
3
+ module Racecar
4
+ class LivenessProbe
5
+ def initialize(message_bus, file_path, max_interval)
6
+ @message_bus = message_bus
7
+ @file_path = file_path
8
+ @max_interval = max_interval
9
+ @subscribers = []
10
+ end
11
+
12
+ attr_reader :message_bus, :file_path, :max_interval, :subscribers
13
+ private :message_bus, :file_path, :max_interval, :subscribers
14
+
15
+ def check_liveness_within_interval!
16
+ unless liveness_event_within_interval?
17
+ $stderr.puts "Racecar healthcheck failed: No liveness within interval #{max_interval}s. Last liveness at #{last_liveness_event_at}, #{elapsed_since_liveness_event} seconds ago."
18
+ Process.exit(1)
19
+ end
20
+ end
21
+
22
+ def liveness_event_within_interval?
23
+ elapsed_since_liveness_event < max_interval
24
+ rescue Errno::ENOENT
25
+ $stderr.puts "Racecar healthcheck failed: Liveness file not found `#{file_path}`"
26
+ Process.exit(1)
27
+ end
28
+
29
+ def install
30
+ unless file_path && file_writeable?
31
+ raise(
32
+ "Liveness probe configuration error: `liveness_probe_file_path` must be set to a writable file path.\n" \
33
+ " Set `RACECAR_LIVENESS_PROBE_FILE_PATH` and `RACECAR_LIVENESS_MAX_INTERVAL` environment variables."
34
+ )
35
+ end
36
+
37
+ subscribers << message_bus.subscribe("start_main_loop.racecar") do
38
+ touch_liveness_file
39
+ end
40
+
41
+ subscribers = message_bus.subscribe("shut_down.racecar") do
42
+ delete_liveness_file
43
+ end
44
+
45
+ nil
46
+ end
47
+
48
+ def uninstall
49
+ subscribers.each { |s| message_bus.unsubscribe(s) }
50
+ end
51
+
52
+ private
53
+
54
+ def elapsed_since_liveness_event
55
+ Time.now - last_liveness_event_at
56
+ end
57
+
58
+ def last_liveness_event_at
59
+ File.mtime(file_path)
60
+ end
61
+
62
+ def touch_liveness_file
63
+ FileUtils.touch(file_path)
64
+ end
65
+
66
+ def delete_liveness_file
67
+ FileUtils.rm_rf(file_path)
68
+ end
69
+
70
+ def file_writeable?
71
+ File.write(file_path, "")
72
+ File.unlink(file_path)
73
+ true
74
+ rescue
75
+ false
76
+ end
77
+ end
78
+ end
@@ -1,11 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "forwardable"
2
4
 
3
5
  module Racecar
4
6
  class Message
5
7
  extend Forwardable
6
8
 
7
- def initialize(rdkafka_message)
9
+ attr_reader :retries_count
10
+
11
+ def initialize(rdkafka_message, retries_count: nil)
8
12
  @rdkafka_message = rdkafka_message
13
+ @retries_count = retries_count
9
14
  end
10
15
 
11
16
  def_delegators :@rdkafka_message, :topic, :partition, :offset, :key, :headers