racecar 0.5.0.beta2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Racecar
4
+ ##
5
+ # Common API for instrumentation to standardize
6
+ # namespace and default payload
7
+ #
8
+ class Instrumenter
9
+ NAMESPACE = "racecar"
10
+ attr_reader :backend
11
+
12
+ def initialize(default_payload = {})
13
+ @default_payload = default_payload
14
+
15
+ @backend = if defined?(ActiveSupport::Notifications)
16
+ # ActiveSupport needs `concurrent-ruby` but doesn't `require` it.
17
+ require 'concurrent/utility/monotonic_time'
18
+ ActiveSupport::Notifications
19
+ else
20
+ NullInstrumenter
21
+ end
22
+ end
23
+
24
+ def instrument(event_name, payload = {}, &block)
25
+ @backend.instrument("#{event_name}.#{NAMESPACE}", @default_payload.merge(payload), &block)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ module Racecar
6
+ class Message
7
+ extend Forwardable
8
+
9
+ attr_reader :retries_count
10
+
11
+ def initialize(rdkafka_message, retries_count: nil)
12
+ @rdkafka_message = rdkafka_message
13
+ @retries_count = retries_count
14
+ end
15
+
16
+ def_delegators :@rdkafka_message, :topic, :partition, :offset, :key, :headers
17
+
18
+ def value
19
+ @rdkafka_message.payload
20
+ end
21
+
22
+ def create_time
23
+ @rdkafka_message.timestamp
24
+ end
25
+
26
+ def ==(other)
27
+ @rdkafka_message == other.instance_variable_get(:@rdkafka_message)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Racecar
4
+ # Ignores all instrumentation events.
5
+ class NullInstrumenter
6
+ def self.instrument(*)
7
+ yield({}) if block_given?
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Racecar
4
+ class Pause
5
+ attr_reader :pauses_count
6
+
7
+ def initialize(timeout: nil, max_timeout: nil, exponential_backoff: false)
8
+ @started_at = nil
9
+ @pauses_count = 0
10
+ @timeout = timeout
11
+ @max_timeout = max_timeout
12
+ @exponential_backoff = exponential_backoff
13
+ end
14
+
15
+ def pause!
16
+ @started_at = Time.now
17
+ @ends_at = @started_at + backoff_interval unless @timeout.nil?
18
+ @pauses_count += 1
19
+ end
20
+
21
+ def resume!
22
+ @started_at = nil
23
+ @ends_at = nil
24
+ end
25
+
26
+ def paused?
27
+ !@started_at.nil?
28
+ end
29
+
30
+ def pause_duration
31
+ if paused?
32
+ Time.now - @started_at
33
+ else
34
+ 0
35
+ end
36
+ end
37
+
38
+ def expired?
39
+ return false if @timeout.nil?
40
+ return true unless @ends_at
41
+ Time.now >= @ends_at
42
+ end
43
+
44
+ def reset!
45
+ @pauses_count = 0
46
+ end
47
+
48
+ def backoff_interval
49
+ return Float::INFINITY if @timeout.nil?
50
+
51
+ backoff_factor = @exponential_backoff ? 2**@pauses_count : 1
52
+ timeout = backoff_factor * @timeout
53
+
54
+ timeout = @max_timeout if @max_timeout && timeout > @max_timeout
55
+
56
+ timeout
57
+ end
58
+ end
59
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  module RailsConfigFileLoader
3
5
  def self.load!
@@ -1,148 +1,257 @@
1
- require "kafka"
1
+ # frozen_string_literal: true
2
+
3
+ require "rdkafka"
4
+ require "racecar/pause"
5
+ require "racecar/message"
2
6
 
3
7
  module Racecar
4
8
  class Runner
5
- attr_reader :processor, :config, :logger, :consumer
9
+ attr_reader :processor, :config, :logger
6
10
 
7
11
  def initialize(processor, config:, logger:, instrumenter: NullInstrumenter)
8
12
  @processor, @config, @logger = processor, config, logger
9
13
  @instrumenter = instrumenter
14
+ @stop_requested = false
15
+ Rdkafka::Config.logger = logger
16
+
17
+ if processor.respond_to?(:statistics_callback)
18
+ Rdkafka::Config.statistics_callback = processor.method(:statistics_callback).to_proc
19
+ end
20
+
21
+ setup_pauses
10
22
  end
11
23
 
12
- def stop
13
- Thread.new do
14
- processor.teardown
15
- consumer.stop unless consumer.nil?
16
- end.join
24
+ def setup_pauses
25
+ timeout = if config.pause_timeout == -1
26
+ nil
27
+ elsif config.pause_timeout == 0
28
+ # no op, handled elsewhere
29
+ elsif config.pause_timeout > 0
30
+ config.pause_timeout
31
+ else
32
+ raise ArgumentError, "Invalid value for pause_timeout: must be integer greater or equal -1"
33
+ end
34
+
35
+ @pauses = Hash.new {|h, k|
36
+ h[k] = Hash.new {|h2, k2|
37
+ h2[k2] = ::Racecar::Pause.new(
38
+ timeout: timeout,
39
+ max_timeout: config.max_pause_timeout,
40
+ exponential_backoff: config.pause_with_exponential_backoff
41
+ )
42
+ }
43
+ }
17
44
  end
18
45
 
19
46
  def run
20
- kafka = Kafka.new(
21
- client_id: config.client_id,
22
- seed_brokers: config.brokers,
23
- logger: logger,
24
- connect_timeout: config.connect_timeout,
25
- socket_timeout: config.socket_timeout,
26
- ssl_ca_cert: config.ssl_ca_cert,
27
- ssl_ca_cert_file_path: config.ssl_ca_cert_file_path,
28
- ssl_client_cert: config.ssl_client_cert,
29
- ssl_client_cert_key: config.ssl_client_cert_key,
30
- sasl_plain_username: config.sasl_plain_username,
31
- sasl_plain_password: config.sasl_plain_password,
32
- sasl_scram_username: config.sasl_scram_username,
33
- sasl_scram_password: config.sasl_scram_password,
34
- sasl_scram_mechanism: config.sasl_scram_mechanism,
35
- sasl_over_ssl: config.sasl_over_ssl,
36
- ssl_ca_certs_from_system: config.ssl_ca_certs_from_system,
37
- )
47
+ install_signal_handlers
48
+ @stop_requested = false
38
49
 
39
- @consumer = kafka.consumer(
40
- group_id: config.group_id,
41
- offset_commit_interval: config.offset_commit_interval,
42
- offset_commit_threshold: config.offset_commit_threshold,
43
- session_timeout: config.session_timeout,
44
- heartbeat_interval: config.heartbeat_interval,
45
- offset_retention_time: config.offset_retention_time,
46
- fetcher_max_queue_size: config.max_fetch_queue_size,
50
+ # Configure the consumer with a producer so it can produce messages and
51
+ # with a consumer so that it can support advanced use-cases.
52
+ processor.configure(
53
+ producer: producer,
54
+ consumer: consumer,
55
+ instrumenter: @instrumenter,
47
56
  )
48
57
 
58
+ instrumentation_payload = {
59
+ consumer_class: processor.class.to_s,
60
+ consumer_set: consumer
61
+ }
62
+
63
+ # Main loop
64
+ loop do
65
+ break if @stop_requested
66
+ resume_paused_partitions
67
+ @instrumenter.instrument("main_loop", instrumentation_payload) do
68
+ case process_method
69
+ when :batch then
70
+ msg_per_part = consumer.batch_poll(config.max_wait_time_ms).group_by(&:partition)
71
+ msg_per_part.each_value do |messages|
72
+ process_batch(messages)
73
+ end
74
+ when :single then
75
+ message = consumer.poll(config.max_wait_time_ms)
76
+ process(message) if message
77
+ end
78
+ end
79
+ end
80
+
81
+ logger.info "Gracefully shutting down"
82
+ processor.deliver!
83
+ processor.teardown
84
+ consumer.commit
85
+ @instrumenter.instrument('leave_group') do
86
+ consumer.close
87
+ end
88
+ end
89
+
90
+ def stop
91
+ @stop_requested = true
92
+ end
93
+
94
+ private
95
+
96
+ attr_reader :pauses
97
+
98
+ def process_method
99
+ @process_method ||= begin
100
+ case
101
+ when processor.respond_to?(:process_batch) then :batch
102
+ when processor.respond_to?(:process) then :single
103
+ else
104
+ raise NotImplementedError, "Consumer class must implement process or process_batch method"
105
+ end
106
+ end
107
+ end
108
+
109
+ def consumer
110
+ @consumer ||= begin
111
+ # Manually store offset after messages have been processed successfully
112
+ # to avoid marking failed messages as committed. The call just updates
113
+ # a value within librdkafka and is asynchronously written to proper
114
+ # storage through auto commits.
115
+ config.consumer << "enable.auto.offset.store=false"
116
+ ConsumerSet.new(config, logger, @instrumenter)
117
+ end
118
+ end
119
+
120
+ def producer
121
+ @producer ||= Rdkafka::Config.new(producer_config).producer.tap do |producer|
122
+ producer.delivery_callback = delivery_callback
123
+ end
124
+ end
125
+
126
+ def producer_config
127
+ # https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
128
+ producer_config = {
129
+ "bootstrap.servers" => config.brokers.join(","),
130
+ "client.id" => config.client_id,
131
+ "statistics.interval.ms" => 1000,
132
+ }
133
+ producer_config["compression.codec"] = config.producer_compression_codec.to_s unless config.producer_compression_codec.nil?
134
+ producer_config.merge!(config.rdkafka_producer)
135
+ producer_config
136
+ end
137
+
138
+ def delivery_callback
139
+ ->(delivery_report) do
140
+ payload = {
141
+ offset: delivery_report.offset,
142
+ partition: delivery_report.partition
143
+ }
144
+ @instrumenter.instrument("acknowledged_message", payload)
145
+ end
146
+ end
147
+
148
+ def install_signal_handlers
49
149
  # Stop the consumer on SIGINT, SIGQUIT or SIGTERM.
50
150
  trap("QUIT") { stop }
51
- trap("INT") { stop }
151
+ trap("INT") { stop }
52
152
  trap("TERM") { stop }
53
153
 
54
154
  # Print the consumer config to STDERR on USR1.
55
155
  trap("USR1") { $stderr.puts config.inspect }
156
+ end
56
157
 
57
- config.subscriptions.each do |subscription|
58
- consumer.subscribe(
59
- subscription.topic,
60
- start_from_beginning: subscription.start_from_beginning,
61
- max_bytes_per_partition: subscription.max_bytes_per_partition,
62
- )
63
- end
158
+ def process(message)
159
+ instrumentation_payload = {
160
+ consumer_class: processor.class.to_s,
161
+ topic: message.topic,
162
+ partition: message.partition,
163
+ offset: message.offset,
164
+ create_time: message.timestamp,
165
+ key: message.key,
166
+ value: message.payload,
167
+ headers: message.headers
168
+ }
64
169
 
65
- # Configure the consumer with a producer so it can produce messages.
66
- producer = kafka.producer(
67
- compression_codec: config.producer_compression_codec,
68
- )
170
+ @instrumenter.instrument("start_process_message", instrumentation_payload)
171
+ with_pause(message.topic, message.partition, message.offset..message.offset) do |pause|
172
+ begin
173
+ @instrumenter.instrument("process_message", instrumentation_payload) do
174
+ processor.process(Racecar::Message.new(message, retries_count: pause.pauses_count))
175
+ processor.deliver!
176
+ consumer.store_offset(message)
177
+ end
178
+ rescue => e
179
+ instrumentation_payload[:retries_count] = pause.pauses_count
180
+ config.error_handler.call(e, instrumentation_payload)
181
+ raise e
182
+ end
183
+ end
184
+ end
69
185
 
70
- processor.configure(consumer: consumer, producer: producer)
186
+ def process_batch(messages)
187
+ first, last = messages.first, messages.last
188
+ instrumentation_payload = {
189
+ consumer_class: processor.class.to_s,
190
+ topic: first.topic,
191
+ partition: first.partition,
192
+ first_offset: first.offset,
193
+ last_offset: last.offset,
194
+ last_create_time: last.timestamp,
195
+ message_count: messages.size
196
+ }
71
197
 
72
- begin
73
- if processor.respond_to?(:process)
74
- consumer.each_message(max_wait_time: config.max_wait_time, max_bytes: config.max_bytes) do |message|
75
- payload = {
76
- consumer_class: processor.class.to_s,
77
- topic: message.topic,
78
- partition: message.partition,
79
- offset: message.offset,
80
- }
81
-
82
- @instrumenter.instrument("process_message.racecar", payload) do
83
- processor.process(message)
84
- producer.deliver_messages
198
+ @instrumenter.instrument("start_process_batch", instrumentation_payload)
199
+ @instrumenter.instrument("process_batch", instrumentation_payload) do
200
+ with_pause(first.topic, first.partition, first.offset..last.offset) do |pause|
201
+ begin
202
+ racecar_messages = messages.map do |message|
203
+ Racecar::Message.new(message, retries_count: pause.pauses_count)
85
204
  end
205
+ processor.process_batch(racecar_messages)
206
+ processor.deliver!
207
+ consumer.store_offset(messages.last)
208
+ rescue => e
209
+ instrumentation_payload[:retries_count] = pause.pauses_count
210
+ config.error_handler.call(e, instrumentation_payload)
211
+ raise e
86
212
  end
87
- elsif processor.respond_to?(:process_batch)
88
- consumer.each_batch(max_wait_time: config.max_wait_time, max_bytes: config.max_bytes) do |batch|
89
- payload = {
90
- consumer_class: processor.class.to_s,
91
- topic: batch.topic,
92
- partition: batch.partition,
93
- first_offset: batch.first_offset,
94
- message_count: batch.messages.count,
95
- }
96
-
97
- @instrumenter.instrument("process_batch.racecar", payload) do
98
- processor.process_batch(batch)
99
- producer.deliver_messages
100
- end
101
- end
102
- else
103
- raise NotImplementedError, "Consumer class must implement process or process_batch method"
104
- end
105
- rescue Kafka::ProcessingError => e
106
- @logger.error "Error processing partition #{e.topic}/#{e.partition} at offset #{e.offset}"
107
-
108
- if config.pause_timeout > 0
109
- # Pause fetches from the partition. We'll continue processing the other partitions in the topic.
110
- # The partition is automatically resumed after the specified timeout, and will continue where we
111
- # left off.
112
- @logger.warn "Pausing partition #{e.topic}/#{e.partition} for #{config.pause_timeout} seconds"
113
- consumer.pause(
114
- e.topic,
115
- e.partition,
116
- timeout: config.pause_timeout,
117
- max_timeout: config.max_pause_timeout,
118
- exponential_backoff: config.pause_with_exponential_backoff?,
119
- )
120
- elsif config.pause_timeout == -1
121
- # A pause timeout of -1 means indefinite pausing, which in ruby-kafka is done by passing nil as
122
- # the timeout.
123
- @logger.warn "Pausing partition #{e.topic}/#{e.partition} indefinitely, or until the process is restarted"
124
- consumer.pause(e.topic, e.partition, timeout: nil)
125
213
  end
214
+ end
215
+ end
126
216
 
127
- config.error_handler.call(e.cause, {
128
- topic: e.topic,
129
- partition: e.partition,
130
- offset: e.offset,
131
- })
217
+ def with_pause(topic, partition, offsets)
218
+ pause = pauses[topic][partition]
219
+ return yield pause if config.pause_timeout == 0
132
220
 
133
- # Restart the consumer loop.
134
- retry
135
- rescue Kafka::InvalidSessionTimeout
136
- raise ConfigError, "`session_timeout` is set either too high or too low"
137
- rescue Kafka::Error => e
138
- error = "#{e.class}: #{e.message}\n" + e.backtrace.join("\n")
139
- @logger.error "Consumer thread crashed: #{error}"
221
+ begin
222
+ yield pause
223
+ # We've successfully processed a batch from the partition, so we can clear the pause.
224
+ pauses[topic][partition].reset!
225
+ rescue => e
226
+ desc = "#{topic}/#{partition}"
227
+ logger.error "Failed to process #{desc} at #{offsets}: #{e}"
140
228
 
141
- config.error_handler.call(e)
229
+ logger.warn "Pausing partition #{desc} for #{pause.backoff_interval} seconds"
230
+ consumer.pause(topic, partition, offsets.first)
231
+ pause.pause!
232
+ end
233
+ end
142
234
 
143
- raise
144
- else
145
- @logger.info "Gracefully shutting down"
235
+ def resume_paused_partitions
236
+ return if config.pause_timeout == 0
237
+
238
+ pauses.each do |topic, partitions|
239
+ partitions.each do |partition, pause|
240
+ instrumentation_payload = {
241
+ topic: topic,
242
+ partition: partition,
243
+ duration: pause.pause_duration,
244
+ consumer_class: processor.class.to_s,
245
+ }
246
+ @instrumenter.instrument("pause_status", instrumentation_payload)
247
+
248
+ if pause.paused? && pause.expired?
249
+ logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
250
+ consumer.resume(topic, partition)
251
+ pause.resume!
252
+ # TODO: # During re-balancing we might have lost the paused partition. Check if partition is still in group before seek. ?
253
+ end
254
+ end
146
255
  end
147
256
  end
148
257
  end