racecar 2.0.0 → 2.10.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +17 -0
  3. data/.github/workflows/ci.yml +46 -0
  4. data/.github/workflows/publish.yml +12 -0
  5. data/.gitignore +1 -2
  6. data/CHANGELOG.md +83 -1
  7. data/Dockerfile +9 -0
  8. data/Gemfile +6 -0
  9. data/Gemfile.lock +72 -0
  10. data/README.md +303 -82
  11. data/Rakefile +5 -0
  12. data/docker-compose.yml +65 -0
  13. data/examples/batch_consumer.rb +4 -2
  14. data/examples/cat_consumer.rb +2 -0
  15. data/examples/producing_consumer.rb +2 -0
  16. data/exe/racecar +37 -14
  17. data/extra/datadog-dashboard.json +1 -0
  18. data/lib/ensure_hash_compact.rb +2 -0
  19. data/lib/generators/racecar/consumer_generator.rb +2 -0
  20. data/lib/generators/racecar/install_generator.rb +2 -0
  21. data/lib/racecar/cli.rb +26 -21
  22. data/lib/racecar/config.rb +80 -4
  23. data/lib/racecar/consumer.rb +51 -6
  24. data/lib/racecar/consumer_set.rb +113 -44
  25. data/lib/racecar/ctl.rb +31 -3
  26. data/lib/racecar/daemon.rb +4 -2
  27. data/lib/racecar/datadog.rb +83 -3
  28. data/lib/racecar/delivery_callback.rb +27 -0
  29. data/lib/racecar/erroneous_state_error.rb +34 -0
  30. data/lib/racecar/heroku.rb +49 -0
  31. data/lib/racecar/instrumenter.rb +4 -7
  32. data/lib/racecar/liveness_probe.rb +78 -0
  33. data/lib/racecar/message.rb +6 -1
  34. data/lib/racecar/message_delivery_error.rb +112 -0
  35. data/lib/racecar/null_instrumenter.rb +2 -0
  36. data/lib/racecar/parallel_runner.rb +110 -0
  37. data/lib/racecar/pause.rb +8 -4
  38. data/lib/racecar/producer.rb +139 -0
  39. data/lib/racecar/rails_config_file_loader.rb +7 -1
  40. data/lib/racecar/rebalance_listener.rb +58 -0
  41. data/lib/racecar/runner.rb +79 -37
  42. data/lib/racecar/version.rb +3 -1
  43. data/lib/racecar.rb +36 -8
  44. data/racecar.gemspec +7 -4
  45. metadata +47 -25
  46. data/.github/workflows/rspec.yml +0 -24
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Racecar
4
+ # MessageDeliveryError wraps an Rdkafka error and tries to give
5
+ # specific hints on how to debug or resolve the error within the
6
+ # Racecar context.
7
+ class MessageDeliveryError < StandardError
8
+ # partition_from_delivery_handle takes an rdkafka delivery handle
9
+ # and returns a human readable version of the partition. It handles
10
+ # the case where the partition is unknown.
11
+ def self.partition_from_delivery_handle(delivery_handle)
12
+ partition = delivery_handle&.create_result&.partition
13
+ # -1 is rdkafka-ruby's default value, which gets eventually set by librdkafka
14
+ return "no yet known" if partition.nil? || partition == -1
15
+ partition.to_s
16
+ end
17
+
18
+ def initialize(rdkafka_error, delivery_handle)
19
+ raise rdkafka_error unless rdkafka_error.is_a?(Rdkafka::RdkafkaError)
20
+
21
+ @rdkafka_error = rdkafka_error
22
+ @delivery_handle = delivery_handle
23
+ end
24
+
25
+ attr_reader :rdkafka_error
26
+
27
+ def code
28
+ @rdkafka_error.code
29
+ end
30
+
31
+ def to_s
32
+ msg = <<~EOM
33
+ Message delivery finally failed:
34
+ #{@rdkafka_error.to_s}
35
+
36
+ #{explain}
37
+ EOM
38
+ end
39
+
40
+ private
41
+
42
+ def explain
43
+ case @rdkafka_error.code
44
+ when :msg_timed_out # -192
45
+ <<~EOM
46
+ Could not deliver message within Racecar.config.message_timeout.
47
+
48
+ This can happen for various reasons, but most commonly because the connection to the broker is interrupted or there is no leader available. Check the broker's logs or the network for more insight.
49
+
50
+ Upstream documentation:
51
+ https://github.com/edenhill/librdkafka/blob/master/INTRODUCTION.md#error-local-time-out
52
+ EOM
53
+
54
+ when :msg_size_too_large # 10
55
+ <<~EOM
56
+ Could not deliver message, since it is bigger than either the broker's or Racecar's maximum message size.
57
+
58
+ The broker's config option on the topic is called "max.message.bytes" and the broker wide default is "message.max.bytes". The client's is "message.max.bytes". Take extra care to distinguish this from similarly named properties for receiving/consuming messages (i.e. Racecar.config.max_bytes is NOT related).
59
+
60
+ Racecar's limit is currently not configurable and uses librdkafka's default of 1 MB (10³ bytes). As of writing, librdkafka will send at least one message regardless of this limit. It is therefore very likely you're hitting the broker's limit and not Racecar's/librdkafka's.
61
+
62
+ Upstream documentation:
63
+ broker per topic: https://docs.confluent.io/platform/current/installation/configuration/topic-configs.html#topicconfigs_max.message.bytes
64
+ broker default: https://docs.confluent.io/platform/current/installation/configuration/broker-configs.html#brokerconfigs_message.max.bytes
65
+ client: https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
66
+ EOM
67
+
68
+ when :unknown_topic_or_part # 3
69
+ partition = self.class.partition_from_delivery_handle(@delivery_handle)
70
+
71
+ <<~EOM
72
+ Could not deliver message, since the targeted topic or partition (#{partition}) does not exist.
73
+
74
+ Check that there are no typos, or that the broker's "auto.create.topics.enable" is enabled. For freshly created topics with auto create enabled, this may appear in the beginning (race condition on creation and publishing).
75
+
76
+ Upstream documentation:
77
+ broker setting: https://docs.confluent.io/platform/current/installation/configuration/broker-configs.html#brokerconfigs_auto.create.topics.enable
78
+ client: https://github.com/edenhill/librdkafka/blob/master/INTRODUCTION.md#topic-metadata-propagation-for-newly-created-topics
79
+ https://github.com/edenhill/librdkafka/blob/master/INTRODUCTION.md#topic-auto-creation
80
+ EOM
81
+
82
+ when :record_list_too_large # 18
83
+ <<~EOM
84
+ Tried to deliver more messages in a batch than the broker's segment size.
85
+
86
+ Either increase the broker's "log.segment.bytes", or decrease any of the client's related settings "batch.num.messages", "batch.size" or "message.max.bytes". None of these are configurable through Racecar yet, as the defaults should be sufficient and sane.
87
+
88
+ Upstream documentation:
89
+ broker: https://docs.confluent.io/platform/current/installation/configuration/broker-configs.html#brokerconfigs_log.segment.bytes
90
+ client: https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
91
+ EOM
92
+
93
+ when :topic_authorization_failed # 29
94
+ <<~EOM
95
+ Failed to deliver message because of insufficient authorization to write into the topic.
96
+
97
+ Double check that it is not a race condition on topic creation. If it isn't, verify the ACLs are correct.
98
+
99
+ Upstream documentation:
100
+ https://github.com/edenhill/librdkafka/blob/master/INTRODUCTION.md#unknown-or-unauthorized-topics
101
+ EOM
102
+
103
+ else
104
+ <<~EOM
105
+ No specific information is available for this error. Consider adding it to Racecar. You can find generally helpful information in the upstream documentation:
106
+ https://github.com/edenhill/librdkafka/blob/master/INTRODUCTION.md
107
+ https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
108
+ EOM
109
+ end
110
+ end
111
+ end
112
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  # Ignores all instrumentation events.
3
5
  class NullInstrumenter
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Racecar
4
+ class ParallelRunner
5
+ Worker = Struct.new(:pid, :parent_reader)
6
+
7
+ SHUTDOWN_SIGNALS = ["INT", "QUIT", "TERM"]
8
+
9
+ def initialize(runner:, config:, logger:)
10
+ @runner = runner
11
+ @config = config
12
+ @logger = logger
13
+ end
14
+
15
+ def worker_pids
16
+ workers.map(&:pid)
17
+ end
18
+
19
+ def running?
20
+ @running
21
+ end
22
+
23
+ def run
24
+ logger.info "=> Running with #{config.parallel_workers} parallel workers"
25
+
26
+ self.workers = config.parallel_workers.times.map do
27
+ run_worker.tap { |w| logger.info "=> Forked new Racecar consumer with process id #{w.pid}" }
28
+ end
29
+
30
+ # Print the consumer config to STDERR on USR1.
31
+ trap("USR1") { $stderr.puts config.inspect }
32
+
33
+ SHUTDOWN_SIGNALS.each { |signal| trap(signal) { terminate_workers } }
34
+
35
+ @running = true
36
+
37
+ wait_for_exit
38
+ end
39
+
40
+ def stop
41
+ terminate_workers
42
+ end
43
+
44
+ private
45
+
46
+ attr_accessor :workers
47
+ attr_reader :runner, :config, :logger
48
+
49
+ def run_worker
50
+ parent_reader, child_writer = IO.pipe
51
+
52
+ pid = fork do
53
+ begin
54
+ parent_reader.close
55
+
56
+ runner.run
57
+ rescue Exception => e
58
+ # Allow the parent process to re-raise the exception after shutdown
59
+ child_writer.binmode
60
+ child_writer.write(Marshal.dump(e))
61
+ ensure
62
+ child_writer.close
63
+ end
64
+ end
65
+
66
+ child_writer.close
67
+
68
+ Worker.new(pid, parent_reader)
69
+ end
70
+
71
+ def terminate_workers
72
+ return if @terminating
73
+
74
+ @terminating = true
75
+ $stderr.puts "=> Terminating workers"
76
+
77
+ Process.kill("TERM", *workers.map(&:pid))
78
+ end
79
+
80
+ def wait_for_exit
81
+ # The call to IO.select blocks until one or more of our readers are ready for reading,
82
+ # which could be for one of two reasons:
83
+ #
84
+ # - An exception is raised in the child process, in which case we should initiate
85
+ # a shutdown;
86
+ #
87
+ # - A graceful shutdown was already initiated, and the pipe writer has been closed, in
88
+ # which case there is nothing more to do.
89
+ #
90
+ # - One of the child processes was killed somehow. If this turns out to be too strict
91
+ # (i.e. closing down all the workers, we could revisit and look at restarting dead
92
+ # workers.
93
+ #
94
+ ready_readers = IO.select(workers.map(&:parent_reader)).first
95
+
96
+ first_read = ready_readers.first.read
97
+
98
+ terminate_workers
99
+
100
+ workers.map(&:pid).each do |pid|
101
+ logger.debug "=> Waiting for worker with pid #{pid} to exit"
102
+ Process.waitpid(pid)
103
+ logger.debug "=> Worker with pid #{pid} shutdown"
104
+ end
105
+
106
+ exception_found = !first_read.empty?
107
+ raise Marshal.load(first_read) if exception_found
108
+ end
109
+ end
110
+ end
data/lib/racecar/pause.rb CHANGED
@@ -1,8 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  class Pause
5
+ attr_reader :pauses_count
6
+
3
7
  def initialize(timeout: nil, max_timeout: nil, exponential_backoff: false)
4
8
  @started_at = nil
5
- @pauses = 0
9
+ @pauses_count = 0
6
10
  @timeout = timeout
7
11
  @max_timeout = max_timeout
8
12
  @exponential_backoff = exponential_backoff
@@ -11,7 +15,7 @@ module Racecar
11
15
  def pause!
12
16
  @started_at = Time.now
13
17
  @ends_at = @started_at + backoff_interval unless @timeout.nil?
14
- @pauses += 1
18
+ @pauses_count += 1
15
19
  end
16
20
 
17
21
  def resume!
@@ -38,13 +42,13 @@ module Racecar
38
42
  end
39
43
 
40
44
  def reset!
41
- @pauses = 0
45
+ @pauses_count = 0
42
46
  end
43
47
 
44
48
  def backoff_interval
45
49
  return Float::INFINITY if @timeout.nil?
46
50
 
47
- backoff_factor = @exponential_backoff ? 2**@pauses : 1
51
+ backoff_factor = @exponential_backoff ? 2**@pauses_count : 1
48
52
  timeout = backoff_factor * @timeout
49
53
 
50
54
  timeout = @max_timeout if @max_timeout && timeout > @max_timeout
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "racecar/message_delivery_error"
4
+ require "racecar/delivery_callback"
5
+
6
+ at_exit do
7
+ Racecar::Producer.shutdown!
8
+ end
9
+
10
+ module Racecar
11
+ class Producer
12
+
13
+ @@mutex = Mutex.new
14
+
15
+ class << self
16
+ def shutdown!
17
+ @@mutex.synchronize do
18
+ if !@internal_producer.nil?
19
+ @internal_producer.close
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ def initialize(config: nil, logger: nil, instrumenter: NullInstrumenter)
26
+ @config = config
27
+ @logger = logger
28
+ @delivery_handles = []
29
+ @instrumenter = instrumenter
30
+ @batching = false
31
+ @internal_producer = init_internal_producer(config)
32
+ end
33
+
34
+ def init_internal_producer(config)
35
+ @@mutex.synchronize do
36
+ @@init_internal_producer ||= begin
37
+ # https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
38
+ producer_config = {
39
+ "bootstrap.servers" => config.brokers.join(","),
40
+ "client.id" => config.client_id,
41
+ "statistics.interval.ms" => config.statistics_interval_ms,
42
+ "message.timeout.ms" => config.message_timeout * 1000,
43
+ }
44
+ producer_config["compression.codec"] = config.producer_compression_codec.to_s unless config.producer_compression_codec.nil?
45
+ producer_config.merge!(config.rdkafka_producer)
46
+ Rdkafka::Config.new(producer_config).producer.tap do |producer|
47
+ producer.delivery_callback = DeliveryCallback.new(instrumenter: @instrumenter)
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ # fire and forget - you won't get any guarantees or feedback from
54
+ # Racecar on the status of the message and it won't halt execution
55
+ # of the rest of your code.
56
+ def produce_async(value:, topic:, **options)
57
+ with_instrumentation(action: "produce_async", value: value, topic: topic, **options) do
58
+ begin
59
+ handle = internal_producer.produce(payload: value, topic: topic, **options)
60
+ @delivery_handles << handle if @batching
61
+ rescue Rdkafka::RdkafkaError => e
62
+ raise MessageDeliveryError.new(e, handle)
63
+ end
64
+ end
65
+
66
+ nil
67
+ end
68
+
69
+ # synchronous message production - will wait until the delivery handle succeeds, fails or times out.
70
+ def produce_sync(value:, topic:, **options)
71
+ with_instrumentation(action: "produce_sync", value: value, topic: topic, **options) do
72
+ begin
73
+ handle = internal_producer.produce(payload: value, topic: topic, **options)
74
+ deliver_with_error_handling(handle)
75
+ rescue Rdkafka::RdkafkaError => e
76
+ raise MessageDeliveryError.new(e, handle)
77
+ end
78
+ end
79
+
80
+ nil
81
+ end
82
+
83
+ # Blocks until all messages that have been asynchronously produced in the block have been delivered.
84
+ # Usage:
85
+ # messages = [
86
+ # {value: "message1", topic: "topic1"},
87
+ # {value: "message2", topic: "topic1"},
88
+ # {value: "message3", topic: "topic2"}
89
+ # ]
90
+ # Racecar.wait_for_delivery {
91
+ # messages.each do |msg|
92
+ # Racecar.produce_async(value: msg[:value], topic: msg[:topic])
93
+ # end
94
+ # }
95
+ def wait_for_delivery
96
+ @batching = true
97
+ @delivery_handles.clear
98
+ yield
99
+ @delivery_handles.each do |handle|
100
+ deliver_with_error_handling(handle)
101
+ end
102
+ ensure
103
+ @delivery_handles.clear
104
+ @batching = false
105
+
106
+ nil
107
+ end
108
+
109
+ private
110
+
111
+ attr_reader :internal_producer
112
+
113
+ def deliver_with_error_handling(handle)
114
+ handle.wait
115
+ rescue Rdkafka::AbstractHandle::WaitTimeoutError => e
116
+ partition = MessageDeliveryError.partition_from_delivery_handle(handle)
117
+ @logger.warn "Still trying to deliver message to (partition #{partition})... (will try up to Racecar.config.message_timeout)"
118
+ retry
119
+ rescue Rdkafka::RdkafkaError => e
120
+ raise MessageDeliveryError.new(e, handle)
121
+ end
122
+
123
+ def with_instrumentation(action:, value:, topic:, **options)
124
+ message_size = value.respond_to?(:bytesize) ? value.bytesize : 0
125
+ instrumentation_payload = {
126
+ value: value,
127
+ topic: topic,
128
+ message_size: message_size,
129
+ buffer_size: @delivery_handles.size,
130
+ key: options.fetch(:key, nil),
131
+ partition: options.fetch(:partition, nil),
132
+ partition_key: options.fetch(:partition_key, nil)
133
+ }
134
+ @instrumenter.instrument(action, instrumentation_payload) do
135
+ yield
136
+ end
137
+ end
138
+ end
139
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Racecar
2
4
  module RailsConfigFileLoader
3
5
  def self.load!
@@ -24,7 +26,11 @@ module Racecar
24
26
  console = ActiveSupport::Logger.new($stdout)
25
27
  console.formatter = Rails.logger.formatter
26
28
  console.level = Rails.logger.level
27
- Rails.logger.extend(ActiveSupport::Logger.broadcast(console))
29
+ if ::Rails::VERSION::STRING < "7.1"
30
+ Rails.logger.extend(ActiveSupport::Logger.broadcast(console))
31
+ else
32
+ Rails.logger = ActiveSupport::BroadcastLogger.new(Rails.logger, console)
33
+ end
28
34
  end
29
35
  end
30
36
  end
@@ -0,0 +1,58 @@
1
+ module Racecar
2
+ class RebalanceListener
3
+ def initialize(consumer_class, instrumenter)
4
+ @consumer_class = consumer_class
5
+ @instrumenter = instrumenter
6
+ @rdkafka_consumer = nil
7
+ end
8
+
9
+ attr_writer :rdkafka_consumer
10
+
11
+ attr_reader :consumer_class, :instrumenter, :rdkafka_consumer
12
+ private :consumer_class, :instrumenter, :rdkafka_consumer
13
+
14
+ def on_partitions_assigned(rdkafka_topic_partition_list)
15
+ event = Event.new(rdkafka_consumer: rdkafka_consumer, rdkafka_topic_partition_list: rdkafka_topic_partition_list)
16
+
17
+ instrument("partitions_assigned", partitions: event.partition_numbers) do
18
+ consumer_class.on_partitions_assigned(event)
19
+ end
20
+ end
21
+
22
+ def on_partitions_revoked(rdkafka_topic_partition_list)
23
+ event = Event.new(rdkafka_consumer: rdkafka_consumer, rdkafka_topic_partition_list: rdkafka_topic_partition_list)
24
+
25
+ instrument("partitions_revoked", partitions: event.partition_numbers) do
26
+ consumer_class.on_partitions_revoked(event)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def instrument(event, payload, &block)
33
+ instrumenter.instrument(event, payload, &block)
34
+ end
35
+
36
+ class Event
37
+ def initialize(rdkafka_topic_partition_list:, rdkafka_consumer:)
38
+ @__rdkafka_topic_partition_list = rdkafka_topic_partition_list
39
+ @__rdkafka_consumer = rdkafka_consumer
40
+ end
41
+
42
+ def topic_name
43
+ __rdkafka_topic_partition_list.to_h.keys.first
44
+ end
45
+
46
+ def partition_numbers
47
+ __rdkafka_topic_partition_list.to_h.values.flatten.map(&:partition)
48
+ end
49
+
50
+ def empty?
51
+ __rdkafka_topic_partition_list.empty?
52
+ end
53
+
54
+ # API private and not guaranteed stable
55
+ attr_reader :__rdkafka_topic_partition_list, :__rdkafka_consumer
56
+ end
57
+ end
58
+ end