racecar 0.5.0.beta2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.circleci/config.yml +56 -0
- data/.github/workflows/ci.yml +61 -0
- data/.gitignore +0 -1
- data/CHANGELOG.md +48 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +69 -0
- data/README.md +56 -59
- data/Rakefile +2 -0
- data/docker-compose.yml +32 -0
- data/examples/batch_consumer.rb +2 -0
- data/examples/cat_consumer.rb +2 -0
- data/examples/producing_consumer.rb +2 -0
- data/exe/racecar +36 -13
- data/lib/ensure_hash_compact.rb +12 -0
- data/lib/generators/racecar/consumer_generator.rb +2 -0
- data/lib/generators/racecar/install_generator.rb +2 -0
- data/lib/racecar.rb +20 -14
- data/lib/racecar/cli.rb +25 -22
- data/lib/racecar/config.rb +109 -45
- data/lib/racecar/consumer.rb +52 -11
- data/lib/racecar/consumer_set.rb +239 -0
- data/lib/racecar/ctl.rb +8 -8
- data/lib/racecar/daemon.rb +2 -0
- data/lib/racecar/datadog.rb +247 -0
- data/lib/racecar/instrumenter.rb +28 -0
- data/lib/racecar/message.rb +30 -0
- data/lib/racecar/null_instrumenter.rb +10 -0
- data/lib/racecar/pause.rb +59 -0
- data/lib/racecar/rails_config_file_loader.rb +2 -0
- data/lib/racecar/runner.rb +222 -113
- data/lib/racecar/version.rb +3 -1
- data/racecar.gemspec +7 -3
- metadata +91 -13
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Racecar
|
4
|
+
##
|
5
|
+
# Common API for instrumentation to standardize
|
6
|
+
# namespace and default payload
|
7
|
+
#
|
8
|
+
class Instrumenter
|
9
|
+
NAMESPACE = "racecar"
|
10
|
+
attr_reader :backend
|
11
|
+
|
12
|
+
def initialize(default_payload = {})
|
13
|
+
@default_payload = default_payload
|
14
|
+
|
15
|
+
@backend = if defined?(ActiveSupport::Notifications)
|
16
|
+
# ActiveSupport needs `concurrent-ruby` but doesn't `require` it.
|
17
|
+
require 'concurrent/utility/monotonic_time'
|
18
|
+
ActiveSupport::Notifications
|
19
|
+
else
|
20
|
+
NullInstrumenter
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def instrument(event_name, payload = {}, &block)
|
25
|
+
@backend.instrument("#{event_name}.#{NAMESPACE}", @default_payload.merge(payload), &block)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "forwardable"
|
4
|
+
|
5
|
+
module Racecar
|
6
|
+
class Message
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
attr_reader :retries_count
|
10
|
+
|
11
|
+
def initialize(rdkafka_message, retries_count: nil)
|
12
|
+
@rdkafka_message = rdkafka_message
|
13
|
+
@retries_count = retries_count
|
14
|
+
end
|
15
|
+
|
16
|
+
def_delegators :@rdkafka_message, :topic, :partition, :offset, :key, :headers
|
17
|
+
|
18
|
+
def value
|
19
|
+
@rdkafka_message.payload
|
20
|
+
end
|
21
|
+
|
22
|
+
def create_time
|
23
|
+
@rdkafka_message.timestamp
|
24
|
+
end
|
25
|
+
|
26
|
+
def ==(other)
|
27
|
+
@rdkafka_message == other.instance_variable_get(:@rdkafka_message)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Racecar
|
4
|
+
class Pause
|
5
|
+
attr_reader :pauses_count
|
6
|
+
|
7
|
+
def initialize(timeout: nil, max_timeout: nil, exponential_backoff: false)
|
8
|
+
@started_at = nil
|
9
|
+
@pauses_count = 0
|
10
|
+
@timeout = timeout
|
11
|
+
@max_timeout = max_timeout
|
12
|
+
@exponential_backoff = exponential_backoff
|
13
|
+
end
|
14
|
+
|
15
|
+
def pause!
|
16
|
+
@started_at = Time.now
|
17
|
+
@ends_at = @started_at + backoff_interval unless @timeout.nil?
|
18
|
+
@pauses_count += 1
|
19
|
+
end
|
20
|
+
|
21
|
+
def resume!
|
22
|
+
@started_at = nil
|
23
|
+
@ends_at = nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def paused?
|
27
|
+
!@started_at.nil?
|
28
|
+
end
|
29
|
+
|
30
|
+
def pause_duration
|
31
|
+
if paused?
|
32
|
+
Time.now - @started_at
|
33
|
+
else
|
34
|
+
0
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def expired?
|
39
|
+
return false if @timeout.nil?
|
40
|
+
return true unless @ends_at
|
41
|
+
Time.now >= @ends_at
|
42
|
+
end
|
43
|
+
|
44
|
+
def reset!
|
45
|
+
@pauses_count = 0
|
46
|
+
end
|
47
|
+
|
48
|
+
def backoff_interval
|
49
|
+
return Float::INFINITY if @timeout.nil?
|
50
|
+
|
51
|
+
backoff_factor = @exponential_backoff ? 2**@pauses_count : 1
|
52
|
+
timeout = backoff_factor * @timeout
|
53
|
+
|
54
|
+
timeout = @max_timeout if @max_timeout && timeout > @max_timeout
|
55
|
+
|
56
|
+
timeout
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/racecar/runner.rb
CHANGED
@@ -1,148 +1,257 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rdkafka"
|
4
|
+
require "racecar/pause"
|
5
|
+
require "racecar/message"
|
2
6
|
|
3
7
|
module Racecar
|
4
8
|
class Runner
|
5
|
-
attr_reader :processor, :config, :logger
|
9
|
+
attr_reader :processor, :config, :logger
|
6
10
|
|
7
11
|
def initialize(processor, config:, logger:, instrumenter: NullInstrumenter)
|
8
12
|
@processor, @config, @logger = processor, config, logger
|
9
13
|
@instrumenter = instrumenter
|
14
|
+
@stop_requested = false
|
15
|
+
Rdkafka::Config.logger = logger
|
16
|
+
|
17
|
+
if processor.respond_to?(:statistics_callback)
|
18
|
+
Rdkafka::Config.statistics_callback = processor.method(:statistics_callback).to_proc
|
19
|
+
end
|
20
|
+
|
21
|
+
setup_pauses
|
10
22
|
end
|
11
23
|
|
12
|
-
def
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
24
|
+
def setup_pauses
|
25
|
+
timeout = if config.pause_timeout == -1
|
26
|
+
nil
|
27
|
+
elsif config.pause_timeout == 0
|
28
|
+
# no op, handled elsewhere
|
29
|
+
elsif config.pause_timeout > 0
|
30
|
+
config.pause_timeout
|
31
|
+
else
|
32
|
+
raise ArgumentError, "Invalid value for pause_timeout: must be integer greater or equal -1"
|
33
|
+
end
|
34
|
+
|
35
|
+
@pauses = Hash.new {|h, k|
|
36
|
+
h[k] = Hash.new {|h2, k2|
|
37
|
+
h2[k2] = ::Racecar::Pause.new(
|
38
|
+
timeout: timeout,
|
39
|
+
max_timeout: config.max_pause_timeout,
|
40
|
+
exponential_backoff: config.pause_with_exponential_backoff
|
41
|
+
)
|
42
|
+
}
|
43
|
+
}
|
17
44
|
end
|
18
45
|
|
19
46
|
def run
|
20
|
-
|
21
|
-
|
22
|
-
seed_brokers: config.brokers,
|
23
|
-
logger: logger,
|
24
|
-
connect_timeout: config.connect_timeout,
|
25
|
-
socket_timeout: config.socket_timeout,
|
26
|
-
ssl_ca_cert: config.ssl_ca_cert,
|
27
|
-
ssl_ca_cert_file_path: config.ssl_ca_cert_file_path,
|
28
|
-
ssl_client_cert: config.ssl_client_cert,
|
29
|
-
ssl_client_cert_key: config.ssl_client_cert_key,
|
30
|
-
sasl_plain_username: config.sasl_plain_username,
|
31
|
-
sasl_plain_password: config.sasl_plain_password,
|
32
|
-
sasl_scram_username: config.sasl_scram_username,
|
33
|
-
sasl_scram_password: config.sasl_scram_password,
|
34
|
-
sasl_scram_mechanism: config.sasl_scram_mechanism,
|
35
|
-
sasl_over_ssl: config.sasl_over_ssl,
|
36
|
-
ssl_ca_certs_from_system: config.ssl_ca_certs_from_system,
|
37
|
-
)
|
47
|
+
install_signal_handlers
|
48
|
+
@stop_requested = false
|
38
49
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
offset_retention_time: config.offset_retention_time,
|
46
|
-
fetcher_max_queue_size: config.max_fetch_queue_size,
|
50
|
+
# Configure the consumer with a producer so it can produce messages and
|
51
|
+
# with a consumer so that it can support advanced use-cases.
|
52
|
+
processor.configure(
|
53
|
+
producer: producer,
|
54
|
+
consumer: consumer,
|
55
|
+
instrumenter: @instrumenter,
|
47
56
|
)
|
48
57
|
|
58
|
+
instrumentation_payload = {
|
59
|
+
consumer_class: processor.class.to_s,
|
60
|
+
consumer_set: consumer
|
61
|
+
}
|
62
|
+
|
63
|
+
# Main loop
|
64
|
+
loop do
|
65
|
+
break if @stop_requested
|
66
|
+
resume_paused_partitions
|
67
|
+
@instrumenter.instrument("main_loop", instrumentation_payload) do
|
68
|
+
case process_method
|
69
|
+
when :batch then
|
70
|
+
msg_per_part = consumer.batch_poll(config.max_wait_time_ms).group_by(&:partition)
|
71
|
+
msg_per_part.each_value do |messages|
|
72
|
+
process_batch(messages)
|
73
|
+
end
|
74
|
+
when :single then
|
75
|
+
message = consumer.poll(config.max_wait_time_ms)
|
76
|
+
process(message) if message
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
logger.info "Gracefully shutting down"
|
82
|
+
processor.deliver!
|
83
|
+
processor.teardown
|
84
|
+
consumer.commit
|
85
|
+
@instrumenter.instrument('leave_group') do
|
86
|
+
consumer.close
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def stop
|
91
|
+
@stop_requested = true
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
attr_reader :pauses
|
97
|
+
|
98
|
+
def process_method
|
99
|
+
@process_method ||= begin
|
100
|
+
case
|
101
|
+
when processor.respond_to?(:process_batch) then :batch
|
102
|
+
when processor.respond_to?(:process) then :single
|
103
|
+
else
|
104
|
+
raise NotImplementedError, "Consumer class must implement process or process_batch method"
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def consumer
|
110
|
+
@consumer ||= begin
|
111
|
+
# Manually store offset after messages have been processed successfully
|
112
|
+
# to avoid marking failed messages as committed. The call just updates
|
113
|
+
# a value within librdkafka and is asynchronously written to proper
|
114
|
+
# storage through auto commits.
|
115
|
+
config.consumer << "enable.auto.offset.store=false"
|
116
|
+
ConsumerSet.new(config, logger, @instrumenter)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def producer
|
121
|
+
@producer ||= Rdkafka::Config.new(producer_config).producer.tap do |producer|
|
122
|
+
producer.delivery_callback = delivery_callback
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def producer_config
|
127
|
+
# https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
128
|
+
producer_config = {
|
129
|
+
"bootstrap.servers" => config.brokers.join(","),
|
130
|
+
"client.id" => config.client_id,
|
131
|
+
"statistics.interval.ms" => 1000,
|
132
|
+
}
|
133
|
+
producer_config["compression.codec"] = config.producer_compression_codec.to_s unless config.producer_compression_codec.nil?
|
134
|
+
producer_config.merge!(config.rdkafka_producer)
|
135
|
+
producer_config
|
136
|
+
end
|
137
|
+
|
138
|
+
def delivery_callback
|
139
|
+
->(delivery_report) do
|
140
|
+
payload = {
|
141
|
+
offset: delivery_report.offset,
|
142
|
+
partition: delivery_report.partition
|
143
|
+
}
|
144
|
+
@instrumenter.instrument("acknowledged_message", payload)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def install_signal_handlers
|
49
149
|
# Stop the consumer on SIGINT, SIGQUIT or SIGTERM.
|
50
150
|
trap("QUIT") { stop }
|
51
|
-
trap("INT")
|
151
|
+
trap("INT") { stop }
|
52
152
|
trap("TERM") { stop }
|
53
153
|
|
54
154
|
# Print the consumer config to STDERR on USR1.
|
55
155
|
trap("USR1") { $stderr.puts config.inspect }
|
156
|
+
end
|
56
157
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
158
|
+
def process(message)
|
159
|
+
instrumentation_payload = {
|
160
|
+
consumer_class: processor.class.to_s,
|
161
|
+
topic: message.topic,
|
162
|
+
partition: message.partition,
|
163
|
+
offset: message.offset,
|
164
|
+
create_time: message.timestamp,
|
165
|
+
key: message.key,
|
166
|
+
value: message.payload,
|
167
|
+
headers: message.headers
|
168
|
+
}
|
64
169
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
170
|
+
@instrumenter.instrument("start_process_message", instrumentation_payload)
|
171
|
+
with_pause(message.topic, message.partition, message.offset..message.offset) do |pause|
|
172
|
+
begin
|
173
|
+
@instrumenter.instrument("process_message", instrumentation_payload) do
|
174
|
+
processor.process(Racecar::Message.new(message, retries_count: pause.pauses_count))
|
175
|
+
processor.deliver!
|
176
|
+
consumer.store_offset(message)
|
177
|
+
end
|
178
|
+
rescue => e
|
179
|
+
instrumentation_payload[:retries_count] = pause.pauses_count
|
180
|
+
config.error_handler.call(e, instrumentation_payload)
|
181
|
+
raise e
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
69
185
|
|
70
|
-
|
186
|
+
def process_batch(messages)
|
187
|
+
first, last = messages.first, messages.last
|
188
|
+
instrumentation_payload = {
|
189
|
+
consumer_class: processor.class.to_s,
|
190
|
+
topic: first.topic,
|
191
|
+
partition: first.partition,
|
192
|
+
first_offset: first.offset,
|
193
|
+
last_offset: last.offset,
|
194
|
+
last_create_time: last.timestamp,
|
195
|
+
message_count: messages.size
|
196
|
+
}
|
71
197
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
partition: message.partition,
|
79
|
-
offset: message.offset,
|
80
|
-
}
|
81
|
-
|
82
|
-
@instrumenter.instrument("process_message.racecar", payload) do
|
83
|
-
processor.process(message)
|
84
|
-
producer.deliver_messages
|
198
|
+
@instrumenter.instrument("start_process_batch", instrumentation_payload)
|
199
|
+
@instrumenter.instrument("process_batch", instrumentation_payload) do
|
200
|
+
with_pause(first.topic, first.partition, first.offset..last.offset) do |pause|
|
201
|
+
begin
|
202
|
+
racecar_messages = messages.map do |message|
|
203
|
+
Racecar::Message.new(message, retries_count: pause.pauses_count)
|
85
204
|
end
|
205
|
+
processor.process_batch(racecar_messages)
|
206
|
+
processor.deliver!
|
207
|
+
consumer.store_offset(messages.last)
|
208
|
+
rescue => e
|
209
|
+
instrumentation_payload[:retries_count] = pause.pauses_count
|
210
|
+
config.error_handler.call(e, instrumentation_payload)
|
211
|
+
raise e
|
86
212
|
end
|
87
|
-
elsif processor.respond_to?(:process_batch)
|
88
|
-
consumer.each_batch(max_wait_time: config.max_wait_time, max_bytes: config.max_bytes) do |batch|
|
89
|
-
payload = {
|
90
|
-
consumer_class: processor.class.to_s,
|
91
|
-
topic: batch.topic,
|
92
|
-
partition: batch.partition,
|
93
|
-
first_offset: batch.first_offset,
|
94
|
-
message_count: batch.messages.count,
|
95
|
-
}
|
96
|
-
|
97
|
-
@instrumenter.instrument("process_batch.racecar", payload) do
|
98
|
-
processor.process_batch(batch)
|
99
|
-
producer.deliver_messages
|
100
|
-
end
|
101
|
-
end
|
102
|
-
else
|
103
|
-
raise NotImplementedError, "Consumer class must implement process or process_batch method"
|
104
|
-
end
|
105
|
-
rescue Kafka::ProcessingError => e
|
106
|
-
@logger.error "Error processing partition #{e.topic}/#{e.partition} at offset #{e.offset}"
|
107
|
-
|
108
|
-
if config.pause_timeout > 0
|
109
|
-
# Pause fetches from the partition. We'll continue processing the other partitions in the topic.
|
110
|
-
# The partition is automatically resumed after the specified timeout, and will continue where we
|
111
|
-
# left off.
|
112
|
-
@logger.warn "Pausing partition #{e.topic}/#{e.partition} for #{config.pause_timeout} seconds"
|
113
|
-
consumer.pause(
|
114
|
-
e.topic,
|
115
|
-
e.partition,
|
116
|
-
timeout: config.pause_timeout,
|
117
|
-
max_timeout: config.max_pause_timeout,
|
118
|
-
exponential_backoff: config.pause_with_exponential_backoff?,
|
119
|
-
)
|
120
|
-
elsif config.pause_timeout == -1
|
121
|
-
# A pause timeout of -1 means indefinite pausing, which in ruby-kafka is done by passing nil as
|
122
|
-
# the timeout.
|
123
|
-
@logger.warn "Pausing partition #{e.topic}/#{e.partition} indefinitely, or until the process is restarted"
|
124
|
-
consumer.pause(e.topic, e.partition, timeout: nil)
|
125
213
|
end
|
214
|
+
end
|
215
|
+
end
|
126
216
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
offset: e.offset,
|
131
|
-
})
|
217
|
+
def with_pause(topic, partition, offsets)
|
218
|
+
pause = pauses[topic][partition]
|
219
|
+
return yield pause if config.pause_timeout == 0
|
132
220
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
rescue
|
138
|
-
|
139
|
-
|
221
|
+
begin
|
222
|
+
yield pause
|
223
|
+
# We've successfully processed a batch from the partition, so we can clear the pause.
|
224
|
+
pauses[topic][partition].reset!
|
225
|
+
rescue => e
|
226
|
+
desc = "#{topic}/#{partition}"
|
227
|
+
logger.error "Failed to process #{desc} at #{offsets}: #{e}"
|
140
228
|
|
141
|
-
|
229
|
+
logger.warn "Pausing partition #{desc} for #{pause.backoff_interval} seconds"
|
230
|
+
consumer.pause(topic, partition, offsets.first)
|
231
|
+
pause.pause!
|
232
|
+
end
|
233
|
+
end
|
142
234
|
|
143
|
-
|
144
|
-
|
145
|
-
|
235
|
+
def resume_paused_partitions
|
236
|
+
return if config.pause_timeout == 0
|
237
|
+
|
238
|
+
pauses.each do |topic, partitions|
|
239
|
+
partitions.each do |partition, pause|
|
240
|
+
instrumentation_payload = {
|
241
|
+
topic: topic,
|
242
|
+
partition: partition,
|
243
|
+
duration: pause.pause_duration,
|
244
|
+
consumer_class: processor.class.to_s,
|
245
|
+
}
|
246
|
+
@instrumenter.instrument("pause_status", instrumentation_payload)
|
247
|
+
|
248
|
+
if pause.paused? && pause.expired?
|
249
|
+
logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
|
250
|
+
consumer.resume(topic, partition)
|
251
|
+
pause.resume!
|
252
|
+
# TODO: # During re-balancing we might have lost the paused partition. Check if partition is still in group before seek. ?
|
253
|
+
end
|
254
|
+
end
|
146
255
|
end
|
147
256
|
end
|
148
257
|
end
|