racecar 2.0.0 → 2.10.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +17 -0
- data/.github/workflows/ci.yml +46 -0
- data/.github/workflows/publish.yml +12 -0
- data/.gitignore +1 -2
- data/CHANGELOG.md +83 -1
- data/Dockerfile +9 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +72 -0
- data/README.md +303 -82
- data/Rakefile +5 -0
- data/docker-compose.yml +65 -0
- data/examples/batch_consumer.rb +4 -2
- data/examples/cat_consumer.rb +2 -0
- data/examples/producing_consumer.rb +2 -0
- data/exe/racecar +37 -14
- data/extra/datadog-dashboard.json +1 -0
- data/lib/ensure_hash_compact.rb +2 -0
- data/lib/generators/racecar/consumer_generator.rb +2 -0
- data/lib/generators/racecar/install_generator.rb +2 -0
- data/lib/racecar/cli.rb +26 -21
- data/lib/racecar/config.rb +80 -4
- data/lib/racecar/consumer.rb +51 -6
- data/lib/racecar/consumer_set.rb +113 -44
- data/lib/racecar/ctl.rb +31 -3
- data/lib/racecar/daemon.rb +4 -2
- data/lib/racecar/datadog.rb +83 -3
- data/lib/racecar/delivery_callback.rb +27 -0
- data/lib/racecar/erroneous_state_error.rb +34 -0
- data/lib/racecar/heroku.rb +49 -0
- data/lib/racecar/instrumenter.rb +4 -7
- data/lib/racecar/liveness_probe.rb +78 -0
- data/lib/racecar/message.rb +6 -1
- data/lib/racecar/message_delivery_error.rb +112 -0
- data/lib/racecar/null_instrumenter.rb +2 -0
- data/lib/racecar/parallel_runner.rb +110 -0
- data/lib/racecar/pause.rb +8 -4
- data/lib/racecar/producer.rb +139 -0
- data/lib/racecar/rails_config_file_loader.rb +7 -1
- data/lib/racecar/rebalance_listener.rb +58 -0
- data/lib/racecar/runner.rb +79 -37
- data/lib/racecar/version.rb +3 -1
- data/lib/racecar.rb +36 -8
- data/racecar.gemspec +7 -4
- metadata +47 -25
- data/.github/workflows/rspec.yml +0 -24
data/lib/racecar/consumer_set.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Racecar
|
2
4
|
class ConsumerSet
|
3
5
|
MAX_POLL_TRIES = 10
|
@@ -10,53 +12,50 @@ module Racecar
|
|
10
12
|
@consumers = []
|
11
13
|
@consumer_id_iterator = (0...@config.subscriptions.size).cycle
|
12
14
|
|
15
|
+
@previous_retries = 0
|
16
|
+
|
13
17
|
@last_poll_read_nil_message = false
|
18
|
+
@paused_tpls = Hash.new { |h, k| h[k] = {} }
|
14
19
|
end
|
15
20
|
|
16
|
-
def poll(
|
17
|
-
|
18
|
-
started_at ||= Time.now
|
19
|
-
try ||= 0
|
20
|
-
remain ||= timeout_ms
|
21
|
-
|
22
|
-
msg = remain <= 0 ? nil : current.poll(remain)
|
23
|
-
rescue Rdkafka::RdkafkaError => e
|
24
|
-
wait_before_retry_ms = 100 * (2**try) # 100ms, 200ms, 400ms, …
|
25
|
-
try += 1
|
26
|
-
raise if try >= MAX_POLL_TRIES || remain <= wait_before_retry_ms
|
27
|
-
|
28
|
-
@logger.error "(try #{try}): Error for topic subscription #{current_subscription}: #{e}"
|
29
|
-
|
30
|
-
case e.code
|
31
|
-
when :max_poll_exceeded, :transport # -147, -195
|
32
|
-
reset_current_consumer
|
33
|
-
end
|
34
|
-
|
35
|
-
remain = remaining_time_ms(timeout_ms, started_at)
|
36
|
-
raise if remain <= wait_before_retry_ms
|
37
|
-
|
38
|
-
sleep wait_before_retry_ms/1000.0
|
39
|
-
retry
|
40
|
-
ensure
|
41
|
-
@last_poll_read_nil_message = true if msg.nil?
|
21
|
+
def poll(max_wait_time_ms = @config.max_wait_time_ms)
|
22
|
+
batch_poll(max_wait_time_ms, 1).first
|
42
23
|
end
|
43
24
|
|
44
|
-
#
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
25
|
+
# batch_poll collects messages until any of the following occurs:
|
26
|
+
# - max_wait_time_ms time has passed
|
27
|
+
# - max_messages have been collected
|
28
|
+
# - a nil message was polled (end of topic, Kafka stalled, etc.)
|
29
|
+
#
|
30
|
+
# The messages are from a single topic, but potentially from more than one partition.
|
31
|
+
#
|
32
|
+
# Any errors during polling are retried in an exponential backoff fashion. If an error
|
33
|
+
# occurs, but there is no time left for a backoff and retry, it will return the
|
34
|
+
# already collected messages and only retry on the next call.
|
35
|
+
def batch_poll(max_wait_time_ms = @config.max_wait_time_ms, max_messages = @config.fetch_messages)
|
36
|
+
started_at = Time.now
|
37
|
+
remain_ms = max_wait_time_ms
|
38
|
+
maybe_select_next_consumer
|
39
|
+
messages = []
|
40
|
+
|
41
|
+
while remain_ms > 0 && messages.size < max_messages
|
42
|
+
remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
|
43
|
+
msg = poll_with_retries(remain_ms)
|
52
44
|
break if msg.nil?
|
53
|
-
|
45
|
+
messages << msg
|
54
46
|
end
|
55
|
-
|
47
|
+
|
48
|
+
messages
|
56
49
|
end
|
57
50
|
|
58
51
|
def store_offset(message)
|
59
52
|
current.store_offset(message)
|
53
|
+
rescue Rdkafka::RdkafkaError => e
|
54
|
+
if e.code == :state # -172
|
55
|
+
@logger.warn "Attempted to store_offset, but we're not subscribed to it: #{ErroneousStateError.new(e)}"
|
56
|
+
return
|
57
|
+
end
|
58
|
+
raise e
|
60
59
|
end
|
61
60
|
|
62
61
|
def commit
|
@@ -67,11 +66,17 @@ module Racecar
|
|
67
66
|
|
68
67
|
def close
|
69
68
|
each_subscribed(&:close)
|
69
|
+
@paused_tpls.clear
|
70
70
|
end
|
71
71
|
|
72
72
|
def current
|
73
73
|
@consumers[@consumer_id_iterator.peek] ||= begin
|
74
|
-
|
74
|
+
consumer_config = Rdkafka::Config.new(rdkafka_config(current_subscription))
|
75
|
+
listener = RebalanceListener.new(@config.consumer_class, @instrumenter)
|
76
|
+
consumer_config.consumer_rebalance_listener = listener
|
77
|
+
consumer = consumer_config.consumer
|
78
|
+
listener.rdkafka_consumer = consumer
|
79
|
+
|
75
80
|
@instrumenter.instrument('join_group') do
|
76
81
|
consumer.subscribe current_subscription.topic
|
77
82
|
end
|
@@ -97,16 +102,25 @@ module Racecar
|
|
97
102
|
consumer.pause(filtered_tpl)
|
98
103
|
fake_msg = OpenStruct.new(topic: topic, partition: partition, offset: offset)
|
99
104
|
consumer.seek(fake_msg)
|
105
|
+
|
106
|
+
@paused_tpls[topic][partition] = [consumer, filtered_tpl]
|
100
107
|
end
|
101
108
|
|
102
109
|
def resume(topic, partition)
|
103
110
|
consumer, filtered_tpl = find_consumer_by(topic, partition)
|
111
|
+
|
112
|
+
if !consumer && @paused_tpls[topic][partition]
|
113
|
+
consumer, filtered_tpl = @paused_tpls[topic][partition]
|
114
|
+
end
|
115
|
+
|
104
116
|
if !consumer
|
105
117
|
@logger.info "Attempted to resume #{topic}/#{partition}, but we're not subscribed to it"
|
106
118
|
return
|
107
119
|
end
|
108
120
|
|
109
121
|
consumer.resume(filtered_tpl)
|
122
|
+
@paused_tpls[topic].delete(partition)
|
123
|
+
@paused_tpls.delete(topic) if @paused_tpls[topic].empty?
|
110
124
|
end
|
111
125
|
|
112
126
|
alias :each :each_subscribed
|
@@ -123,6 +137,55 @@ module Racecar
|
|
123
137
|
|
124
138
|
private
|
125
139
|
|
140
|
+
# polls a single message from the current consumer, retrying errors with exponential
|
141
|
+
# backoff. The sleep time is capped by max_wait_time_ms. If there's enough time budget
|
142
|
+
# left, it will retry before returning. If there isn't, the retry will only occur on
|
143
|
+
# the next call. It tries up to MAX_POLL_TRIES before passing on the exception.
|
144
|
+
def poll_with_retries(max_wait_time_ms)
|
145
|
+
try ||= @previous_retries
|
146
|
+
@previous_retries = 0
|
147
|
+
started_at ||= Time.now
|
148
|
+
remain_ms = remaining_time_ms(max_wait_time_ms, started_at)
|
149
|
+
|
150
|
+
wait_ms = try == 0 ? 0 : 50 * (2**try) # 0ms, 100ms, 200ms, 400ms, …
|
151
|
+
if wait_ms >= max_wait_time_ms && remain_ms > 1
|
152
|
+
@logger.debug "Capping #{wait_ms}ms to #{max_wait_time_ms-1}ms."
|
153
|
+
sleep (max_wait_time_ms-1)/1000.0
|
154
|
+
remain_ms = 1
|
155
|
+
elsif try == 0 && remain_ms == 0
|
156
|
+
@logger.debug "No time remains for polling messages. Will try on next call."
|
157
|
+
return nil
|
158
|
+
elsif wait_ms >= remain_ms
|
159
|
+
@logger.warn "Only #{remain_ms}ms left, but want to wait for #{wait_ms}ms before poll. Will retry on next call."
|
160
|
+
@previous_retries = try
|
161
|
+
return nil
|
162
|
+
elsif wait_ms > 0
|
163
|
+
sleep wait_ms/1000.0
|
164
|
+
remain_ms -= wait_ms
|
165
|
+
end
|
166
|
+
|
167
|
+
poll_current_consumer(remain_ms)
|
168
|
+
rescue Rdkafka::RdkafkaError => e
|
169
|
+
try += 1
|
170
|
+
@instrumenter.instrument("poll_retry", try: try, rdkafka_time_limit: remain_ms, exception: e)
|
171
|
+
@logger.error "(try #{try}/#{MAX_POLL_TRIES}): Error for topic subscription #{current_subscription}: #{e}"
|
172
|
+
raise if try >= MAX_POLL_TRIES
|
173
|
+
retry
|
174
|
+
end
|
175
|
+
|
176
|
+
# polls a message for the current consumer, handling any API edge cases.
|
177
|
+
def poll_current_consumer(max_wait_time_ms)
|
178
|
+
msg = current.poll(max_wait_time_ms)
|
179
|
+
rescue Rdkafka::RdkafkaError => e
|
180
|
+
case e.code
|
181
|
+
when :max_poll_exceeded, :transport, :not_coordinator # -147, -195, 16
|
182
|
+
reset_current_consumer
|
183
|
+
end
|
184
|
+
raise
|
185
|
+
ensure
|
186
|
+
@last_poll_read_nil_message = msg.nil?
|
187
|
+
end
|
188
|
+
|
126
189
|
def find_consumer_by(topic, partition)
|
127
190
|
each do |consumer|
|
128
191
|
tpl = consumer.assignment.to_h
|
@@ -140,7 +203,12 @@ module Racecar
|
|
140
203
|
end
|
141
204
|
|
142
205
|
def reset_current_consumer
|
143
|
-
@
|
206
|
+
current_consumer_id = @consumer_id_iterator.peek
|
207
|
+
@logger.info "Resetting consumer with id: #{current_consumer_id}"
|
208
|
+
|
209
|
+
consumer = @consumers[current_consumer_id]
|
210
|
+
consumer.close unless consumer.nil?
|
211
|
+
@consumers[current_consumer_id] = nil
|
144
212
|
end
|
145
213
|
|
146
214
|
def maybe_select_next_consumer
|
@@ -160,14 +228,14 @@ module Racecar
|
|
160
228
|
@logger.debug "Nothing to commit."
|
161
229
|
end
|
162
230
|
|
163
|
-
def collect_messages_for_batch?
|
164
|
-
@messages.size < @config.fetch_messages &&
|
165
|
-
(Time.now - @batch_started_at) < @config.max_wait_time
|
166
|
-
end
|
167
|
-
|
168
231
|
def rdkafka_config(subscription)
|
169
232
|
# https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
170
233
|
config = {
|
234
|
+
# Manually store offset after messages have been processed successfully
|
235
|
+
# to avoid marking failed messages as committed. The call just updates
|
236
|
+
# a value within librdkafka and is asynchronously written to proper
|
237
|
+
# storage through auto commits.
|
238
|
+
"enable.auto.offset.store" => false,
|
171
239
|
"auto.commit.interval.ms" => @config.offset_commit_interval * 1000,
|
172
240
|
"auto.offset.reset" => subscription.start_from_beginning ? "earliest" : "largest",
|
173
241
|
"bootstrap.servers" => @config.brokers.join(","),
|
@@ -183,7 +251,8 @@ module Racecar
|
|
183
251
|
"queued.min.messages" => @config.min_message_queue_size,
|
184
252
|
"session.timeout.ms" => @config.session_timeout * 1000,
|
185
253
|
"socket.timeout.ms" => @config.socket_timeout * 1000,
|
186
|
-
"statistics.interval.ms" =>
|
254
|
+
"statistics.interval.ms" => @config.statistics_interval_ms,
|
255
|
+
"partition.assignment.strategy" => @config.partition_assignment_strategy,
|
187
256
|
}
|
188
257
|
config.merge! @config.rdkafka_consumer
|
189
258
|
config.merge! subscription.additional_config
|
data/lib/racecar/ctl.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "optparse"
|
2
4
|
require "racecar/rails_config_file_loader"
|
3
5
|
require "racecar/daemon"
|
6
|
+
require "racecar/message_delivery_error"
|
4
7
|
|
5
8
|
module Racecar
|
6
9
|
class Ctl
|
@@ -29,6 +32,21 @@ module Racecar
|
|
29
32
|
@command = command
|
30
33
|
end
|
31
34
|
|
35
|
+
def liveness_probe(args)
|
36
|
+
require "racecar/liveness_probe"
|
37
|
+
parse_options!(args)
|
38
|
+
|
39
|
+
if ENV["RAILS_ENV"] && File.exist?("config/racecar.yml")
|
40
|
+
Racecar.config.load_file("config/racecar.yml", ENV["RAILS_ENV"])
|
41
|
+
end
|
42
|
+
|
43
|
+
if File.exist?("config/racecar.rb")
|
44
|
+
require "./config/racecar"
|
45
|
+
end
|
46
|
+
|
47
|
+
Racecar.config.liveness_probe.check_liveness_within_interval!
|
48
|
+
end
|
49
|
+
|
32
50
|
def status(args)
|
33
51
|
parse_options!(args)
|
34
52
|
|
@@ -94,11 +112,17 @@ module Racecar
|
|
94
112
|
Racecar.config.validate!
|
95
113
|
|
96
114
|
producer = Rdkafka::Config.new({
|
97
|
-
"bootstrap.servers":
|
98
|
-
"client.id":
|
115
|
+
"bootstrap.servers": Racecar.config.brokers.join(","),
|
116
|
+
"client.id": Racecar.config.client_id,
|
117
|
+
"message.timeout.ms": Racecar.config.message_timeout * 1000,
|
99
118
|
}.merge(Racecar.config.rdkafka_producer)).producer
|
100
119
|
|
101
|
-
producer.produce(payload: message.value, key: message.key, topic: message.topic)
|
120
|
+
handle = producer.produce(payload: message.value, key: message.key, topic: message.topic)
|
121
|
+
begin
|
122
|
+
handle.wait(max_wait_timeout: Racecar.config.message_timeout)
|
123
|
+
rescue Rdkafka::RdkafkaError => e
|
124
|
+
raise MessageDeliveryError.new(e, handle)
|
125
|
+
end
|
102
126
|
|
103
127
|
$stderr.puts "=> Delivered message to Kafka cluster"
|
104
128
|
end
|
@@ -116,5 +140,9 @@ module Racecar
|
|
116
140
|
|
117
141
|
parser.parse!(args)
|
118
142
|
end
|
143
|
+
|
144
|
+
def config
|
145
|
+
Racecar.config
|
146
|
+
end
|
119
147
|
end
|
120
148
|
end
|
data/lib/racecar/daemon.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Racecar
|
2
4
|
class Daemon
|
3
5
|
attr_reader :pidfile
|
@@ -52,7 +54,7 @@ module Racecar
|
|
52
54
|
end
|
53
55
|
|
54
56
|
def pid
|
55
|
-
if File.
|
57
|
+
if File.exist?(pidfile)
|
56
58
|
File.read(pidfile).to_i
|
57
59
|
else
|
58
60
|
nil
|
@@ -87,7 +89,7 @@ module Racecar
|
|
87
89
|
end
|
88
90
|
|
89
91
|
at_exit do
|
90
|
-
File.delete(pidfile) if File.
|
92
|
+
File.delete(pidfile) if File.exist?(pidfile)
|
91
93
|
end
|
92
94
|
rescue Errno::EEXIST
|
93
95
|
check_pid
|
data/lib/racecar/datadog.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
begin
|
2
4
|
require "datadog/statsd"
|
3
5
|
rescue LoadError
|
@@ -61,10 +63,14 @@ module Racecar
|
|
61
63
|
clear
|
62
64
|
end
|
63
65
|
|
66
|
+
def close
|
67
|
+
@statsd&.close
|
68
|
+
end
|
69
|
+
|
64
70
|
private
|
65
71
|
|
66
72
|
def clear
|
67
|
-
|
73
|
+
close
|
68
74
|
@statsd = nil
|
69
75
|
end
|
70
76
|
end
|
@@ -73,8 +79,8 @@ module Racecar
|
|
73
79
|
private
|
74
80
|
|
75
81
|
%w[increment histogram count timing gauge].each do |type|
|
76
|
-
define_method(type) do |*args|
|
77
|
-
emit(type, *args)
|
82
|
+
define_method(type) do |*args, **kwargs|
|
83
|
+
emit(type, *args, **kwargs)
|
78
84
|
end
|
79
85
|
end
|
80
86
|
|
@@ -155,6 +161,15 @@ module Racecar
|
|
155
161
|
end
|
156
162
|
end
|
157
163
|
|
164
|
+
def poll_retry(event)
|
165
|
+
tags = {
|
166
|
+
client: event.payload.fetch(:client_id),
|
167
|
+
group_id: event.payload.fetch(:group_id),
|
168
|
+
}
|
169
|
+
rdkafka_error_code = event.payload.fetch(:exception).code.to_s.gsub(/\W/, '')
|
170
|
+
increment("consumer.poll.rdkafka_error.#{rdkafka_error_code}", tags: tags)
|
171
|
+
end
|
172
|
+
|
158
173
|
def main_loop(event)
|
159
174
|
tags = {
|
160
175
|
client: event.payload.fetch(:client_id),
|
@@ -196,6 +211,10 @@ module Racecar
|
|
196
211
|
topic: topic,
|
197
212
|
}
|
198
213
|
|
214
|
+
if event.payload.key?(:exception)
|
215
|
+
increment("producer.produce.errors", tags: tags)
|
216
|
+
end
|
217
|
+
|
199
218
|
# This gets us the write rate.
|
200
219
|
increment("producer.produce.messages", tags: tags.merge(topic: topic))
|
201
220
|
|
@@ -230,6 +249,67 @@ module Racecar
|
|
230
249
|
increment("producer.ack.messages", tags: tags)
|
231
250
|
end
|
232
251
|
|
252
|
+
def produce_delivery_error(event)
|
253
|
+
tags = {
|
254
|
+
client: event.payload.fetch(:client_id),
|
255
|
+
}
|
256
|
+
|
257
|
+
increment("producer.produce.delivery.errors", tags: tags)
|
258
|
+
end
|
259
|
+
|
260
|
+
def produce_async(event)
|
261
|
+
client = event.payload.fetch(:client_id)
|
262
|
+
topic = event.payload.fetch(:topic)
|
263
|
+
message_size = event.payload.fetch(:message_size)
|
264
|
+
buffer_size = event.payload.fetch(:buffer_size)
|
265
|
+
|
266
|
+
tags = {
|
267
|
+
client: client,
|
268
|
+
topic: topic,
|
269
|
+
}
|
270
|
+
|
271
|
+
if event.payload.key?(:exception)
|
272
|
+
increment("producer.produce.errors", tags: tags)
|
273
|
+
end
|
274
|
+
|
275
|
+
# This gets us the write rate.
|
276
|
+
increment("producer.produce.messages", tags: tags.merge(topic: topic))
|
277
|
+
|
278
|
+
# Information about typical/average/95p message size.
|
279
|
+
histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
|
280
|
+
|
281
|
+
# Aggregate message size.
|
282
|
+
count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
|
283
|
+
|
284
|
+
# This gets us the avg/max buffer size per producer.
|
285
|
+
histogram("producer.buffer.size", buffer_size, tags: tags)
|
286
|
+
end
|
287
|
+
|
288
|
+
def produce_sync(event)
|
289
|
+
client = event.payload.fetch(:client_id)
|
290
|
+
topic = event.payload.fetch(:topic)
|
291
|
+
message_size = event.payload.fetch(:message_size)
|
292
|
+
|
293
|
+
tags = {
|
294
|
+
client: client,
|
295
|
+
topic: topic,
|
296
|
+
}
|
297
|
+
|
298
|
+
if event.payload.key?(:exception)
|
299
|
+
increment("producer.produce.errors", tags: tags)
|
300
|
+
end
|
301
|
+
|
302
|
+
|
303
|
+
# This gets us the write rate.
|
304
|
+
increment("producer.produce.messages", tags: tags.merge(topic: topic))
|
305
|
+
|
306
|
+
# Information about typical/average/95p message size.
|
307
|
+
histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
|
308
|
+
|
309
|
+
# Aggregate message size.
|
310
|
+
count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
|
311
|
+
end
|
312
|
+
|
233
313
|
attach_to "racecar"
|
234
314
|
end
|
235
315
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Racecar
|
4
|
+
class DeliveryCallback
|
5
|
+
attr_reader :instrumenter
|
6
|
+
|
7
|
+
def initialize(instrumenter:)
|
8
|
+
@instrumenter = instrumenter
|
9
|
+
end
|
10
|
+
|
11
|
+
def call(delivery_report)
|
12
|
+
if delivery_report.error.to_i.zero?
|
13
|
+
payload = {
|
14
|
+
offset: delivery_report.offset,
|
15
|
+
partition: delivery_report.partition
|
16
|
+
}
|
17
|
+
instrumenter.instrument("acknowledged_message", payload)
|
18
|
+
else
|
19
|
+
payload = {
|
20
|
+
partition: delivery_report.partition,
|
21
|
+
exception: delivery_report.error
|
22
|
+
}
|
23
|
+
instrumenter.instrument("produce_delivery_error", payload)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# `rd_kafka_offsets_store()` (et.al) returns an error for any
|
2
|
+
# partition that is not currently assigned (through `rd_kafka_*assign()`).
|
3
|
+
# This prevents a race condition where an application would store offsets
|
4
|
+
# after the assigned partitions had been revoked (which resets the stored
|
5
|
+
# offset), that could cause these old stored offsets to be committed later
|
6
|
+
# when the same partitions were assigned to this consumer again - effectively
|
7
|
+
# overwriting any committed offsets by any consumers that were assigned the
|
8
|
+
# same partitions previously. This would typically result in the offsets
|
9
|
+
# rewinding and messages to be reprocessed.
|
10
|
+
# As an extra effort to avoid this situation the stored offset is now
|
11
|
+
# also reset when partitions are assigned (through `rd_kafka_*assign()`).
|
12
|
+
module Racecar
|
13
|
+
class ErroneousStateError < StandardError
|
14
|
+
def initialize(rdkafka_error)
|
15
|
+
raise rdkafka_error unless rdkafka_error.is_a?(Rdkafka::RdkafkaError)
|
16
|
+
|
17
|
+
@rdkafka_error = rdkafka_error
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :rdkafka_error
|
21
|
+
|
22
|
+
def code
|
23
|
+
@rdkafka_error.code
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
<<~EOM
|
28
|
+
Partition is no longer assigned to this consumer and the offset could not be stored for commit.
|
29
|
+
#{@rdkafka_error.to_s}
|
30
|
+
EOM
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
# Heroku Kafka addon provides 4 ENVs to connect to their Kafka Broker
|
6
|
+
# KAFKA_TRUSTED_CERT, KAFKA_CLIENT_CERT, KAFKA_CLIENT_CERT_KEY, KAFKA_URL
|
7
|
+
# This will work only if the Heroku Kafka add-on is aliased to "KAFKA"
|
8
|
+
|
9
|
+
$stderr.puts "=> Loading configuration from Heroku Kafka ENVs"
|
10
|
+
|
11
|
+
module Racecar
|
12
|
+
module Heroku
|
13
|
+
def self.load_configuration!
|
14
|
+
[
|
15
|
+
"KAFKA_URL",
|
16
|
+
"KAFKA_TRUSTED_CERT",
|
17
|
+
"KAFKA_CLIENT_CERT",
|
18
|
+
"KAFKA_CLIENT_CERT_KEY"
|
19
|
+
]. each do |env_name|
|
20
|
+
if ENV[env_name].nil?
|
21
|
+
$stderr.puts "Error: ENV #{env_name} is not set"
|
22
|
+
exit 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Racecar.configure do |config|
|
27
|
+
ca_cert = ENV["KAFKA_TRUSTED_CERT"]
|
28
|
+
client_cert = ENV["KAFKA_CLIENT_CERT"]
|
29
|
+
client_cert_key = ENV["KAFKA_CLIENT_CERT_KEY"]
|
30
|
+
|
31
|
+
tmp_file_path = lambda do |data|
|
32
|
+
tempfile = Tempfile.new(['', '.pem'])
|
33
|
+
tempfile << data
|
34
|
+
tempfile.close
|
35
|
+
tempfile.path
|
36
|
+
end
|
37
|
+
|
38
|
+
config.security_protocol = :ssl
|
39
|
+
config.ssl_ca_location = tmp_file_path.call(ca_cert)
|
40
|
+
config.ssl_certificate_location = tmp_file_path.call(client_cert)
|
41
|
+
config.ssl_key_location = tmp_file_path.call(client_cert_key)
|
42
|
+
|
43
|
+
config.brokers = ENV["KAFKA_URL"].to_s.gsub('kafka+ssl://', '').split(',')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
Racecar::Heroku.load_configuration!
|
data/lib/racecar/instrumenter.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Racecar
|
2
4
|
##
|
3
5
|
# Common API for instrumentation to standardize
|
@@ -7,14 +9,9 @@ module Racecar
|
|
7
9
|
NAMESPACE = "racecar"
|
8
10
|
attr_reader :backend
|
9
11
|
|
10
|
-
def initialize(default_payload
|
12
|
+
def initialize(backend:, default_payload: {})
|
13
|
+
@backend = backend
|
11
14
|
@default_payload = default_payload
|
12
|
-
|
13
|
-
@backend = if defined?(ActiveSupport::Notifications)
|
14
|
-
ActiveSupport::Notifications
|
15
|
-
else
|
16
|
-
NullInstrumenter
|
17
|
-
end
|
18
15
|
end
|
19
16
|
|
20
17
|
def instrument(event_name, payload = {}, &block)
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
|
3
|
+
module Racecar
|
4
|
+
class LivenessProbe
|
5
|
+
def initialize(message_bus, file_path, max_interval)
|
6
|
+
@message_bus = message_bus
|
7
|
+
@file_path = file_path
|
8
|
+
@max_interval = max_interval
|
9
|
+
@subscribers = []
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :message_bus, :file_path, :max_interval, :subscribers
|
13
|
+
private :message_bus, :file_path, :max_interval, :subscribers
|
14
|
+
|
15
|
+
def check_liveness_within_interval!
|
16
|
+
unless liveness_event_within_interval?
|
17
|
+
$stderr.puts "Racecar healthcheck failed: No liveness within interval #{max_interval}s. Last liveness at #{last_liveness_event_at}, #{elapsed_since_liveness_event} seconds ago."
|
18
|
+
Process.exit(1)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def liveness_event_within_interval?
|
23
|
+
elapsed_since_liveness_event < max_interval
|
24
|
+
rescue Errno::ENOENT
|
25
|
+
$stderr.puts "Racecar healthcheck failed: Liveness file not found `#{file_path}`"
|
26
|
+
Process.exit(1)
|
27
|
+
end
|
28
|
+
|
29
|
+
def install
|
30
|
+
unless file_path && file_writeable?
|
31
|
+
raise(
|
32
|
+
"Liveness probe configuration error: `liveness_probe_file_path` must be set to a writable file path.\n" \
|
33
|
+
" Set `RACECAR_LIVENESS_PROBE_FILE_PATH` and `RACECAR_LIVENESS_MAX_INTERVAL` environment variables."
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
subscribers << message_bus.subscribe("start_main_loop.racecar") do
|
38
|
+
touch_liveness_file
|
39
|
+
end
|
40
|
+
|
41
|
+
subscribers = message_bus.subscribe("shut_down.racecar") do
|
42
|
+
delete_liveness_file
|
43
|
+
end
|
44
|
+
|
45
|
+
nil
|
46
|
+
end
|
47
|
+
|
48
|
+
def uninstall
|
49
|
+
subscribers.each { |s| message_bus.unsubscribe(s) }
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def elapsed_since_liveness_event
|
55
|
+
Time.now - last_liveness_event_at
|
56
|
+
end
|
57
|
+
|
58
|
+
def last_liveness_event_at
|
59
|
+
File.mtime(file_path)
|
60
|
+
end
|
61
|
+
|
62
|
+
def touch_liveness_file
|
63
|
+
FileUtils.touch(file_path)
|
64
|
+
end
|
65
|
+
|
66
|
+
def delete_liveness_file
|
67
|
+
FileUtils.rm_rf(file_path)
|
68
|
+
end
|
69
|
+
|
70
|
+
def file_writeable?
|
71
|
+
File.write(file_path, "")
|
72
|
+
File.unlink(file_path)
|
73
|
+
true
|
74
|
+
rescue
|
75
|
+
false
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
data/lib/racecar/message.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "forwardable"
|
2
4
|
|
3
5
|
module Racecar
|
4
6
|
class Message
|
5
7
|
extend Forwardable
|
6
8
|
|
7
|
-
|
9
|
+
attr_reader :retries_count
|
10
|
+
|
11
|
+
def initialize(rdkafka_message, retries_count: nil)
|
8
12
|
@rdkafka_message = rdkafka_message
|
13
|
+
@retries_count = retries_count
|
9
14
|
end
|
10
15
|
|
11
16
|
def_delegators :@rdkafka_message, :topic, :partition, :offset, :key, :headers
|