racecar 2.0.0 → 2.10.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +17 -0
- data/.github/workflows/ci.yml +46 -0
- data/.github/workflows/publish.yml +12 -0
- data/.gitignore +1 -2
- data/CHANGELOG.md +83 -1
- data/Dockerfile +9 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +72 -0
- data/README.md +303 -82
- data/Rakefile +5 -0
- data/docker-compose.yml +65 -0
- data/examples/batch_consumer.rb +4 -2
- data/examples/cat_consumer.rb +2 -0
- data/examples/producing_consumer.rb +2 -0
- data/exe/racecar +37 -14
- data/extra/datadog-dashboard.json +1 -0
- data/lib/ensure_hash_compact.rb +2 -0
- data/lib/generators/racecar/consumer_generator.rb +2 -0
- data/lib/generators/racecar/install_generator.rb +2 -0
- data/lib/racecar/cli.rb +26 -21
- data/lib/racecar/config.rb +80 -4
- data/lib/racecar/consumer.rb +51 -6
- data/lib/racecar/consumer_set.rb +113 -44
- data/lib/racecar/ctl.rb +31 -3
- data/lib/racecar/daemon.rb +4 -2
- data/lib/racecar/datadog.rb +83 -3
- data/lib/racecar/delivery_callback.rb +27 -0
- data/lib/racecar/erroneous_state_error.rb +34 -0
- data/lib/racecar/heroku.rb +49 -0
- data/lib/racecar/instrumenter.rb +4 -7
- data/lib/racecar/liveness_probe.rb +78 -0
- data/lib/racecar/message.rb +6 -1
- data/lib/racecar/message_delivery_error.rb +112 -0
- data/lib/racecar/null_instrumenter.rb +2 -0
- data/lib/racecar/parallel_runner.rb +110 -0
- data/lib/racecar/pause.rb +8 -4
- data/lib/racecar/producer.rb +139 -0
- data/lib/racecar/rails_config_file_loader.rb +7 -1
- data/lib/racecar/rebalance_listener.rb +58 -0
- data/lib/racecar/runner.rb +79 -37
- data/lib/racecar/version.rb +3 -1
- data/lib/racecar.rb +36 -8
- data/racecar.gemspec +7 -4
- metadata +47 -25
- data/.github/workflows/rspec.yml +0 -24
data/lib/racecar/consumer_set.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Racecar
|
2
4
|
class ConsumerSet
|
3
5
|
MAX_POLL_TRIES = 10
|
@@ -10,53 +12,50 @@ module Racecar
|
|
10
12
|
@consumers = []
|
11
13
|
@consumer_id_iterator = (0...@config.subscriptions.size).cycle
|
12
14
|
|
15
|
+
@previous_retries = 0
|
16
|
+
|
13
17
|
@last_poll_read_nil_message = false
|
18
|
+
@paused_tpls = Hash.new { |h, k| h[k] = {} }
|
14
19
|
end
|
15
20
|
|
16
|
-
def poll(
|
17
|
-
|
18
|
-
started_at ||= Time.now
|
19
|
-
try ||= 0
|
20
|
-
remain ||= timeout_ms
|
21
|
-
|
22
|
-
msg = remain <= 0 ? nil : current.poll(remain)
|
23
|
-
rescue Rdkafka::RdkafkaError => e
|
24
|
-
wait_before_retry_ms = 100 * (2**try) # 100ms, 200ms, 400ms, …
|
25
|
-
try += 1
|
26
|
-
raise if try >= MAX_POLL_TRIES || remain <= wait_before_retry_ms
|
27
|
-
|
28
|
-
@logger.error "(try #{try}): Error for topic subscription #{current_subscription}: #{e}"
|
29
|
-
|
30
|
-
case e.code
|
31
|
-
when :max_poll_exceeded, :transport # -147, -195
|
32
|
-
reset_current_consumer
|
33
|
-
end
|
34
|
-
|
35
|
-
remain = remaining_time_ms(timeout_ms, started_at)
|
36
|
-
raise if remain <= wait_before_retry_ms
|
37
|
-
|
38
|
-
sleep wait_before_retry_ms/1000.0
|
39
|
-
retry
|
40
|
-
ensure
|
41
|
-
@last_poll_read_nil_message = true if msg.nil?
|
21
|
+
def poll(max_wait_time_ms = @config.max_wait_time_ms)
|
22
|
+
batch_poll(max_wait_time_ms, 1).first
|
42
23
|
end
|
43
24
|
|
44
|
-
#
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
25
|
+
# batch_poll collects messages until any of the following occurs:
|
26
|
+
# - max_wait_time_ms time has passed
|
27
|
+
# - max_messages have been collected
|
28
|
+
# - a nil message was polled (end of topic, Kafka stalled, etc.)
|
29
|
+
#
|
30
|
+
# The messages are from a single topic, but potentially from more than one partition.
|
31
|
+
#
|
32
|
+
# Any errors during polling are retried in an exponential backoff fashion. If an error
|
33
|
+
# occurs, but there is no time left for a backoff and retry, it will return the
|
34
|
+
# already collected messages and only retry on the next call.
|
35
|
+
def batch_poll(max_wait_time_ms = @config.max_wait_time_ms, max_messages = @config.fetch_messages)
|
36
|
+
started_at = Time.now
|
37
|
+
remain_ms = max_wait_time_ms
|
38
|
+
maybe_select_next_consumer
|
39
|
+
messages = []
|
40
|
+
|
41
|
+
while remain_ms > 0 && messages.size < max_messages
|
42
|
+
remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
|
43
|
+
msg = poll_with_retries(remain_ms)
|
52
44
|
break if msg.nil?
|
53
|
-
|
45
|
+
messages << msg
|
54
46
|
end
|
55
|
-
|
47
|
+
|
48
|
+
messages
|
56
49
|
end
|
57
50
|
|
58
51
|
def store_offset(message)
|
59
52
|
current.store_offset(message)
|
53
|
+
rescue Rdkafka::RdkafkaError => e
|
54
|
+
if e.code == :state # -172
|
55
|
+
@logger.warn "Attempted to store_offset, but we're not subscribed to it: #{ErroneousStateError.new(e)}"
|
56
|
+
return
|
57
|
+
end
|
58
|
+
raise e
|
60
59
|
end
|
61
60
|
|
62
61
|
def commit
|
@@ -67,11 +66,17 @@ module Racecar
|
|
67
66
|
|
68
67
|
def close
|
69
68
|
each_subscribed(&:close)
|
69
|
+
@paused_tpls.clear
|
70
70
|
end
|
71
71
|
|
72
72
|
def current
|
73
73
|
@consumers[@consumer_id_iterator.peek] ||= begin
|
74
|
-
|
74
|
+
consumer_config = Rdkafka::Config.new(rdkafka_config(current_subscription))
|
75
|
+
listener = RebalanceListener.new(@config.consumer_class, @instrumenter)
|
76
|
+
consumer_config.consumer_rebalance_listener = listener
|
77
|
+
consumer = consumer_config.consumer
|
78
|
+
listener.rdkafka_consumer = consumer
|
79
|
+
|
75
80
|
@instrumenter.instrument('join_group') do
|
76
81
|
consumer.subscribe current_subscription.topic
|
77
82
|
end
|
@@ -97,16 +102,25 @@ module Racecar
|
|
97
102
|
consumer.pause(filtered_tpl)
|
98
103
|
fake_msg = OpenStruct.new(topic: topic, partition: partition, offset: offset)
|
99
104
|
consumer.seek(fake_msg)
|
105
|
+
|
106
|
+
@paused_tpls[topic][partition] = [consumer, filtered_tpl]
|
100
107
|
end
|
101
108
|
|
102
109
|
def resume(topic, partition)
|
103
110
|
consumer, filtered_tpl = find_consumer_by(topic, partition)
|
111
|
+
|
112
|
+
if !consumer && @paused_tpls[topic][partition]
|
113
|
+
consumer, filtered_tpl = @paused_tpls[topic][partition]
|
114
|
+
end
|
115
|
+
|
104
116
|
if !consumer
|
105
117
|
@logger.info "Attempted to resume #{topic}/#{partition}, but we're not subscribed to it"
|
106
118
|
return
|
107
119
|
end
|
108
120
|
|
109
121
|
consumer.resume(filtered_tpl)
|
122
|
+
@paused_tpls[topic].delete(partition)
|
123
|
+
@paused_tpls.delete(topic) if @paused_tpls[topic].empty?
|
110
124
|
end
|
111
125
|
|
112
126
|
alias :each :each_subscribed
|
@@ -123,6 +137,55 @@ module Racecar
|
|
123
137
|
|
124
138
|
private
|
125
139
|
|
140
|
+
# polls a single message from the current consumer, retrying errors with exponential
|
141
|
+
# backoff. The sleep time is capped by max_wait_time_ms. If there's enough time budget
|
142
|
+
# left, it will retry before returning. If there isn't, the retry will only occur on
|
143
|
+
# the next call. It tries up to MAX_POLL_TRIES before passing on the exception.
|
144
|
+
def poll_with_retries(max_wait_time_ms)
|
145
|
+
try ||= @previous_retries
|
146
|
+
@previous_retries = 0
|
147
|
+
started_at ||= Time.now
|
148
|
+
remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
|
149
|
+
|
150
|
+
wait_ms = try == 0 ? 0 : 50 * (2**try) # 0ms, 100ms, 200ms, 400ms, …
|
151
|
+
if wait_ms >= max_wait_time_ms && remain_ms > 1
|
152
|
+
@logger.debug "Capping #{wait_ms}ms to #{max_wait_time_ms-1}ms."
|
153
|
+
sleep (max_wait_time_ms-1)/1000.0
|
154
|
+
remain_ms = 1
|
155
|
+
elsif try == 0 && remain_ms == 0
|
156
|
+
@logger.debug "No time remains for polling messages. Will try on next call."
|
157
|
+
return nil
|
158
|
+
elsif wait_ms >= remain_ms
|
159
|
+
@logger.warn "Only #{remain_ms}ms left, but want to wait for #{wait_ms}ms before poll. Will retry on next call."
|
160
|
+
@previous_retries = try
|
161
|
+
return nil
|
162
|
+
elsif wait_ms > 0
|
163
|
+
sleep wait_ms/1000.0
|
164
|
+
remain_ms -= wait_ms
|
165
|
+
end
|
166
|
+
|
167
|
+
poll_current_consumer(remain_ms)
|
168
|
+
rescue Rdkafka::RdkafkaError => e
|
169
|
+
try += 1
|
170
|
+
@instrumenter.instrument("poll_retry", try: try, rdkafka_time_limit: remain_ms, exception: e)
|
171
|
+
@logger.error "(try #{try}/#{MAX_POLL_TRIES}): Error for topic subscription #{current_subscription}: #{e}"
|
172
|
+
raise if try >= MAX_POLL_TRIES
|
173
|
+
retry
|
174
|
+
end
|
175
|
+
|
176
|
+
# polls a message for the current consumer, handling any API edge cases.
|
177
|
+
def poll_current_consumer(max_wait_time_ms)
|
178
|
+
msg = current.poll(max_wait_time_ms)
|
179
|
+
rescue Rdkafka::RdkafkaError => e
|
180
|
+
case e.code
|
181
|
+
when :max_poll_exceeded, :transport, :not_coordinator # -147, -195, 16
|
182
|
+
reset_current_consumer
|
183
|
+
end
|
184
|
+
raise
|
185
|
+
ensure
|
186
|
+
@last_poll_read_nil_message = msg.nil?
|
187
|
+
end
|
188
|
+
|
126
189
|
def find_consumer_by(topic, partition)
|
127
190
|
each do |consumer|
|
128
191
|
tpl = consumer.assignment.to_h
|
@@ -140,7 +203,12 @@ module Racecar
|
|
140
203
|
end
|
141
204
|
|
142
205
|
def reset_current_consumer
|
143
|
-
@
|
206
|
+
current_consumer_id = @consumer_id_iterator.peek
|
207
|
+
@logger.info "Resetting consumer with id: #{current_consumer_id}"
|
208
|
+
|
209
|
+
consumer = @consumers[current_consumer_id]
|
210
|
+
consumer.close unless consumer.nil?
|
211
|
+
@consumers[current_consumer_id] = nil
|
144
212
|
end
|
145
213
|
|
146
214
|
def maybe_select_next_consumer
|
@@ -160,14 +228,14 @@ module Racecar
|
|
160
228
|
@logger.debug "Nothing to commit."
|
161
229
|
end
|
162
230
|
|
163
|
-
def collect_messages_for_batch?
|
164
|
-
@messages.size < @config.fetch_messages &&
|
165
|
-
(Time.now - @batch_started_at) < @config.max_wait_time
|
166
|
-
end
|
167
|
-
|
168
231
|
def rdkafka_config(subscription)
|
169
232
|
# https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
170
233
|
config = {
|
234
|
+
# Manually store offset after messages have been processed successfully
|
235
|
+
# to avoid marking failed messages as committed. The call just updates
|
236
|
+
# a value within librdkafka and is asynchronously written to proper
|
237
|
+
# storage through auto commits.
|
238
|
+
"enable.auto.offset.store" => false,
|
171
239
|
"auto.commit.interval.ms" => @config.offset_commit_interval * 1000,
|
172
240
|
"auto.offset.reset" => subscription.start_from_beginning ? "earliest" : "largest",
|
173
241
|
"bootstrap.servers" => @config.brokers.join(","),
|
@@ -183,7 +251,8 @@ module Racecar
|
|
183
251
|
"queued.min.messages" => @config.min_message_queue_size,
|
184
252
|
"session.timeout.ms" => @config.session_timeout * 1000,
|
185
253
|
"socket.timeout.ms" => @config.socket_timeout * 1000,
|
186
|
-
"statistics.interval.ms" =>
|
254
|
+
"statistics.interval.ms" => @config.statistics_interval_ms,
|
255
|
+
"partition.assignment.strategy" => @config.partition_assignment_strategy,
|
187
256
|
}
|
188
257
|
config.merge! @config.rdkafka_consumer
|
189
258
|
config.merge! subscription.additional_config
|
data/lib/racecar/ctl.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "optparse"
|
2
4
|
require "racecar/rails_config_file_loader"
|
3
5
|
require "racecar/daemon"
|
6
|
+
require "racecar/message_delivery_error"
|
4
7
|
|
5
8
|
module Racecar
|
6
9
|
class Ctl
|
@@ -29,6 +32,21 @@ module Racecar
|
|
29
32
|
@command = command
|
30
33
|
end
|
31
34
|
|
35
|
+
def liveness_probe(args)
|
36
|
+
require "racecar/liveness_probe"
|
37
|
+
parse_options!(args)
|
38
|
+
|
39
|
+
if ENV["RAILS_ENV"] && File.exist?("config/racecar.yml")
|
40
|
+
Racecar.config.load_file("config/racecar.yml", ENV["RAILS_ENV"])
|
41
|
+
end
|
42
|
+
|
43
|
+
if File.exist?("config/racecar.rb")
|
44
|
+
require "./config/racecar"
|
45
|
+
end
|
46
|
+
|
47
|
+
Racecar.config.liveness_probe.check_liveness_within_interval!
|
48
|
+
end
|
49
|
+
|
32
50
|
def status(args)
|
33
51
|
parse_options!(args)
|
34
52
|
|
@@ -94,11 +112,17 @@ module Racecar
|
|
94
112
|
Racecar.config.validate!
|
95
113
|
|
96
114
|
producer = Rdkafka::Config.new({
|
97
|
-
"bootstrap.servers":
|
98
|
-
"client.id":
|
115
|
+
"bootstrap.servers": Racecar.config.brokers.join(","),
|
116
|
+
"client.id": Racecar.config.client_id,
|
117
|
+
"message.timeout.ms": Racecar.config.message_timeout * 1000,
|
99
118
|
}.merge(Racecar.config.rdkafka_producer)).producer
|
100
119
|
|
101
|
-
producer.produce(payload: message.value, key: message.key, topic: message.topic)
|
120
|
+
handle = producer.produce(payload: message.value, key: message.key, topic: message.topic)
|
121
|
+
begin
|
122
|
+
handle.wait(max_wait_timeout: Racecar.config.message_timeout)
|
123
|
+
rescue Rdkafka::RdkafkaError => e
|
124
|
+
raise MessageDeliveryError.new(e, handle)
|
125
|
+
end
|
102
126
|
|
103
127
|
$stderr.puts "=> Delivered message to Kafka cluster"
|
104
128
|
end
|
@@ -116,5 +140,9 @@ module Racecar
|
|
116
140
|
|
117
141
|
parser.parse!(args)
|
118
142
|
end
|
143
|
+
|
144
|
+
def config
|
145
|
+
Racecar.config
|
146
|
+
end
|
119
147
|
end
|
120
148
|
end
|
data/lib/racecar/daemon.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Racecar
|
2
4
|
class Daemon
|
3
5
|
attr_reader :pidfile
|
@@ -52,7 +54,7 @@ module Racecar
|
|
52
54
|
end
|
53
55
|
|
54
56
|
def pid
|
55
|
-
if File.
|
57
|
+
if File.exist?(pidfile)
|
56
58
|
File.read(pidfile).to_i
|
57
59
|
else
|
58
60
|
nil
|
@@ -87,7 +89,7 @@ module Racecar
|
|
87
89
|
end
|
88
90
|
|
89
91
|
at_exit do
|
90
|
-
File.delete(pidfile) if File.
|
92
|
+
File.delete(pidfile) if File.exist?(pidfile)
|
91
93
|
end
|
92
94
|
rescue Errno::EEXIST
|
93
95
|
check_pid
|
data/lib/racecar/datadog.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
begin
|
2
4
|
require "datadog/statsd"
|
3
5
|
rescue LoadError
|
@@ -61,10 +63,14 @@ module Racecar
|
|
61
63
|
clear
|
62
64
|
end
|
63
65
|
|
66
|
+
def close
|
67
|
+
@statsd&.close
|
68
|
+
end
|
69
|
+
|
64
70
|
private
|
65
71
|
|
66
72
|
def clear
|
67
|
-
|
73
|
+
close
|
68
74
|
@statsd = nil
|
69
75
|
end
|
70
76
|
end
|
@@ -73,8 +79,8 @@ module Racecar
|
|
73
79
|
private
|
74
80
|
|
75
81
|
%w[increment histogram count timing gauge].each do |type|
|
76
|
-
define_method(type) do |*args|
|
77
|
-
emit(type, *args)
|
82
|
+
define_method(type) do |*args, **kwargs|
|
83
|
+
emit(type, *args, **kwargs)
|
78
84
|
end
|
79
85
|
end
|
80
86
|
|
@@ -155,6 +161,15 @@ module Racecar
|
|
155
161
|
end
|
156
162
|
end
|
157
163
|
|
164
|
+
def poll_retry(event)
|
165
|
+
tags = {
|
166
|
+
client: event.payload.fetch(:client_id),
|
167
|
+
group_id: event.payload.fetch(:group_id),
|
168
|
+
}
|
169
|
+
rdkafka_error_code = event.payload.fetch(:exception).code.to_s.gsub(/\W/, '')
|
170
|
+
increment("consumer.poll.rdkafka_error.#{rdkafka_error_code}", tags: tags)
|
171
|
+
end
|
172
|
+
|
158
173
|
def main_loop(event)
|
159
174
|
tags = {
|
160
175
|
client: event.payload.fetch(:client_id),
|
@@ -196,6 +211,10 @@ module Racecar
|
|
196
211
|
topic: topic,
|
197
212
|
}
|
198
213
|
|
214
|
+
if event.payload.key?(:exception)
|
215
|
+
increment("producer.produce.errors", tags: tags)
|
216
|
+
end
|
217
|
+
|
199
218
|
# This gets us the write rate.
|
200
219
|
increment("producer.produce.messages", tags: tags.merge(topic: topic))
|
201
220
|
|
@@ -230,6 +249,67 @@ module Racecar
|
|
230
249
|
increment("producer.ack.messages", tags: tags)
|
231
250
|
end
|
232
251
|
|
252
|
+
def produce_delivery_error(event)
|
253
|
+
tags = {
|
254
|
+
client: event.payload.fetch(:client_id),
|
255
|
+
}
|
256
|
+
|
257
|
+
increment("producer.produce.delivery.errors", tags: tags)
|
258
|
+
end
|
259
|
+
|
260
|
+
def produce_async(event)
|
261
|
+
client = event.payload.fetch(:client_id)
|
262
|
+
topic = event.payload.fetch(:topic)
|
263
|
+
message_size = event.payload.fetch(:message_size)
|
264
|
+
buffer_size = event.payload.fetch(:buffer_size)
|
265
|
+
|
266
|
+
tags = {
|
267
|
+
client: client,
|
268
|
+
topic: topic,
|
269
|
+
}
|
270
|
+
|
271
|
+
if event.payload.key?(:exception)
|
272
|
+
increment("producer.produce.errors", tags: tags)
|
273
|
+
end
|
274
|
+
|
275
|
+
# This gets us the write rate.
|
276
|
+
increment("producer.produce.messages", tags: tags.merge(topic: topic))
|
277
|
+
|
278
|
+
# Information about typical/average/95p message size.
|
279
|
+
histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
|
280
|
+
|
281
|
+
# Aggregate message size.
|
282
|
+
count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
|
283
|
+
|
284
|
+
# This gets us the avg/max buffer size per producer.
|
285
|
+
histogram("producer.buffer.size", buffer_size, tags: tags)
|
286
|
+
end
|
287
|
+
|
288
|
+
def produce_sync(event)
|
289
|
+
client = event.payload.fetch(:client_id)
|
290
|
+
topic = event.payload.fetch(:topic)
|
291
|
+
message_size = event.payload.fetch(:message_size)
|
292
|
+
|
293
|
+
tags = {
|
294
|
+
client: client,
|
295
|
+
topic: topic,
|
296
|
+
}
|
297
|
+
|
298
|
+
if event.payload.key?(:exception)
|
299
|
+
increment("producer.produce.errors", tags: tags)
|
300
|
+
end
|
301
|
+
|
302
|
+
|
303
|
+
# This gets us the write rate.
|
304
|
+
increment("producer.produce.messages", tags: tags.merge(topic: topic))
|
305
|
+
|
306
|
+
# Information about typical/average/95p message size.
|
307
|
+
histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
|
308
|
+
|
309
|
+
# Aggregate message size.
|
310
|
+
count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
|
311
|
+
end
|
312
|
+
|
233
313
|
attach_to "racecar"
|
234
314
|
end
|
235
315
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Racecar
|
4
|
+
class DeliveryCallback
|
5
|
+
attr_reader :instrumenter
|
6
|
+
|
7
|
+
def initialize(instrumenter:)
|
8
|
+
@instrumenter = instrumenter
|
9
|
+
end
|
10
|
+
|
11
|
+
def call(delivery_report)
|
12
|
+
if delivery_report.error.to_i.zero?
|
13
|
+
payload = {
|
14
|
+
offset: delivery_report.offset,
|
15
|
+
partition: delivery_report.partition
|
16
|
+
}
|
17
|
+
instrumenter.instrument("acknowledged_message", payload)
|
18
|
+
else
|
19
|
+
payload = {
|
20
|
+
partition: delivery_report.partition,
|
21
|
+
exception: delivery_report.error
|
22
|
+
}
|
23
|
+
instrumenter.instrument("produce_delivery_error", payload)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# `rd_kafka_offsets_store()` (et.al) returns an error for any
|
2
|
+
# partition that is not currently assigned (through `rd_kafka_*assign()`).
|
3
|
+
# This prevents a race condition where an application would store offsets
|
4
|
+
# after the assigned partitions had been revoked (which resets the stored
|
5
|
+
# offset), that could cause these old stored offsets to be committed later
|
6
|
+
# when the same partitions were assigned to this consumer again - effectively
|
7
|
+
# overwriting any committed offsets by any consumers that were assigned the
|
8
|
+
# same partitions previously. This would typically result in the offsets
|
9
|
+
# rewinding and messages to be reprocessed.
|
10
|
+
# As an extra effort to avoid this situation the stored offset is now
|
11
|
+
# also reset when partitions are assigned (through `rd_kafka_*assign()`).
|
12
|
+
module Racecar
|
13
|
+
class ErroneousStateError < StandardError
|
14
|
+
def initialize(rdkafka_error)
|
15
|
+
raise rdkafka_error unless rdkafka_error.is_a?(Rdkafka::RdkafkaError)
|
16
|
+
|
17
|
+
@rdkafka_error = rdkafka_error
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :rdkafka_error
|
21
|
+
|
22
|
+
def code
|
23
|
+
@rdkafka_error.code
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
<<~EOM
|
28
|
+
Partition is no longer assigned to this consumer and the offset could not be stored for commit.
|
29
|
+
#{@rdkafka_error.to_s}
|
30
|
+
EOM
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
# Heroku Kafka addon provides 4 ENVs to connect to their Kafka Broker
|
6
|
+
# KAFKA_TRUSTED_CERT, KAFKA_CLIENT_CERT, KAFKA_CLIENT_CERT_KEY, KAFKA_URL
|
7
|
+
# This will work only if the Heroku Kafka add-on is aliased to "KAFKA"
|
8
|
+
|
9
|
+
$stderr.puts "=> Loading configuration from Heroku Kafka ENVs"
|
10
|
+
|
11
|
+
module Racecar
|
12
|
+
module Heroku
|
13
|
+
def self.load_configuration!
|
14
|
+
[
|
15
|
+
"KAFKA_URL",
|
16
|
+
"KAFKA_TRUSTED_CERT",
|
17
|
+
"KAFKA_CLIENT_CERT",
|
18
|
+
"KAFKA_CLIENT_CERT_KEY"
|
19
|
+
]. each do |env_name|
|
20
|
+
if ENV[env_name].nil?
|
21
|
+
$stderr.puts "Error: ENV #{env_name} is not set"
|
22
|
+
exit 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Racecar.configure do |config|
|
27
|
+
ca_cert = ENV["KAFKA_TRUSTED_CERT"]
|
28
|
+
client_cert = ENV["KAFKA_CLIENT_CERT"]
|
29
|
+
client_cert_key = ENV["KAFKA_CLIENT_CERT_KEY"]
|
30
|
+
|
31
|
+
tmp_file_path = lambda do |data|
|
32
|
+
tempfile = Tempfile.new(['', '.pem'])
|
33
|
+
tempfile << data
|
34
|
+
tempfile.close
|
35
|
+
tempfile.path
|
36
|
+
end
|
37
|
+
|
38
|
+
config.security_protocol = :ssl
|
39
|
+
config.ssl_ca_location = tmp_file_path.call(ca_cert)
|
40
|
+
config.ssl_certificate_location = tmp_file_path.call(client_cert)
|
41
|
+
config.ssl_key_location = tmp_file_path.call(client_cert_key)
|
42
|
+
|
43
|
+
config.brokers = ENV["KAFKA_URL"].to_s.gsub('kafka+ssl://', '').split(',')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
Racecar::Heroku.load_configuration!
|
data/lib/racecar/instrumenter.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Racecar
|
2
4
|
##
|
3
5
|
# Common API for instrumentation to standardize
|
@@ -7,14 +9,9 @@ module Racecar
|
|
7
9
|
NAMESPACE = "racecar"
|
8
10
|
attr_reader :backend
|
9
11
|
|
10
|
-
def initialize(default_payload
|
12
|
+
def initialize(backend:, default_payload: {})
|
13
|
+
@backend = backend
|
11
14
|
@default_payload = default_payload
|
12
|
-
|
13
|
-
@backend = if defined?(ActiveSupport::Notifications)
|
14
|
-
ActiveSupport::Notifications
|
15
|
-
else
|
16
|
-
NullInstrumenter
|
17
|
-
end
|
18
15
|
end
|
19
16
|
|
20
17
|
def instrument(event_name, payload = {}, &block)
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
|
3
|
+
module Racecar
|
4
|
+
class LivenessProbe
|
5
|
+
def initialize(message_bus, file_path, max_interval)
|
6
|
+
@message_bus = message_bus
|
7
|
+
@file_path = file_path
|
8
|
+
@max_interval = max_interval
|
9
|
+
@subscribers = []
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :message_bus, :file_path, :max_interval, :subscribers
|
13
|
+
private :message_bus, :file_path, :max_interval, :subscribers
|
14
|
+
|
15
|
+
def check_liveness_within_interval!
|
16
|
+
unless liveness_event_within_interval?
|
17
|
+
$stderr.puts "Racecar healthcheck failed: No liveness within interval #{max_interval}s. Last liveness at #{last_liveness_event_at}, #{elapsed_since_liveness_event} seconds ago."
|
18
|
+
Process.exit(1)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def liveness_event_within_interval?
|
23
|
+
elapsed_since_liveness_event < max_interval
|
24
|
+
rescue Errno::ENOENT
|
25
|
+
$stderr.puts "Racecar healthcheck failed: Liveness file not found `#{file_path}`"
|
26
|
+
Process.exit(1)
|
27
|
+
end
|
28
|
+
|
29
|
+
def install
|
30
|
+
unless file_path && file_writeable?
|
31
|
+
raise(
|
32
|
+
"Liveness probe configuration error: `liveness_probe_file_path` must be set to a writable file path.\n" \
|
33
|
+
" Set `RACECAR_LIVENESS_PROBE_FILE_PATH` and `RACECAR_LIVENESS_MAX_INTERVAL` environment variables."
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
subscribers << message_bus.subscribe("start_main_loop.racecar") do
|
38
|
+
touch_liveness_file
|
39
|
+
end
|
40
|
+
|
41
|
+
subscribers = message_bus.subscribe("shut_down.racecar") do
|
42
|
+
delete_liveness_file
|
43
|
+
end
|
44
|
+
|
45
|
+
nil
|
46
|
+
end
|
47
|
+
|
48
|
+
def uninstall
|
49
|
+
subscribers.each { |s| message_bus.unsubscribe(s) }
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def elapsed_since_liveness_event
|
55
|
+
Time.now - last_liveness_event_at
|
56
|
+
end
|
57
|
+
|
58
|
+
def last_liveness_event_at
|
59
|
+
File.mtime(file_path)
|
60
|
+
end
|
61
|
+
|
62
|
+
def touch_liveness_file
|
63
|
+
FileUtils.touch(file_path)
|
64
|
+
end
|
65
|
+
|
66
|
+
def delete_liveness_file
|
67
|
+
FileUtils.rm_rf(file_path)
|
68
|
+
end
|
69
|
+
|
70
|
+
def file_writeable?
|
71
|
+
File.write(file_path, "")
|
72
|
+
File.unlink(file_path)
|
73
|
+
true
|
74
|
+
rescue
|
75
|
+
false
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
data/lib/racecar/message.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "forwardable"
|
2
4
|
|
3
5
|
module Racecar
|
4
6
|
class Message
|
5
7
|
extend Forwardable
|
6
8
|
|
7
|
-
|
9
|
+
attr_reader :retries_count
|
10
|
+
|
11
|
+
def initialize(rdkafka_message, retries_count: nil)
|
8
12
|
@rdkafka_message = rdkafka_message
|
13
|
+
@retries_count = retries_count
|
9
14
|
end
|
10
15
|
|
11
16
|
def_delegators :@rdkafka_message, :topic, :partition, :offset, :key, :headers
|