racecar 2.12.0 → 3.0.0.alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a4e6b0c4d52637bd8c6bb2c5e3d69013889053973195a5d73ac5d5645047f985
4
- data.tar.gz: 23425d552932665cafa03cf245d3d4ef879e053e9e9c60d1eb172455f405f3f3
3
+ metadata.gz: 6e6acfbdd63c525020404c4b61e322a3ed5684920b2efc6740137be569ef07a3
4
+ data.tar.gz: fb3250f00850303fc86c87efbb303913e87e271ef6a60068d3cca4339f0b66c4
5
5
  SHA512:
6
- metadata.gz: 62d52be41cb3821a5d6534fea43deea8d02dd8a8f03d3449249be0fcdcbf5c842cbb2be8aac7065b97fb515b16370ab93f114ab44dd85dce6a22091d9ac04de4
7
- data.tar.gz: 006efc7649ddb4257a80e1f2400a62e9e9cba0fa5573d3947f47293d4d77b9b701d66ebf6d465cf91fbf209771e5699ddc9cd7df238ecddf7f54ffbab4eb0c15
6
+ metadata.gz: 9d2a751ce55f466f9ac9f88ec6862cfe1414a1ca3b5bf38ad8a93b5aef720ca88c1d49d11d6ecfdb9964174d3425fff0a476df3361bde50dc86f9b22a5e24930
7
+ data.tar.gz: cc6332078aa586f3defd3f8295b89f24cf889fdfbbea76200b4cb7f84aacbfdd17be4236cc59dbcf456cd4a94e381e11b09af4eabc96e9faad6efc6fddd03b71
@@ -0,0 +1,4 @@
1
+ # CODEOWNERS file
2
+ # This file defines who should review code changes in this repository.
3
+
4
+ * @zendesk/core-gem-owners
@@ -2,9 +2,9 @@ name: CI
2
2
 
3
3
  on:
4
4
  pull_request:
5
- branches: ["master"]
5
+ branches: ["main"]
6
6
  push:
7
- branches: ["master"]
7
+ branches: ["main"]
8
8
 
9
9
  jobs:
10
10
  unit-specs:
@@ -1,12 +1,26 @@
1
- name: Publish Gem
1
+ name: Publish to RubyGems.org
2
2
 
3
3
  on:
4
4
  push:
5
- tags: v*
5
+ branches: main
6
+ paths: lib/racecar/version.rb
7
+ workflow_dispatch:
6
8
 
7
9
  jobs:
8
- call-workflow:
9
- uses: zendesk/gw/.github/workflows/ruby-gem-publication.yml@main
10
- secrets:
11
- RUBY_GEMS_API_KEY: ${{ secrets.RUBY_GEMS_API_KEY }}
12
- RUBY_GEMS_TOTP_DEVICE: ${{ secrets.RUBY_GEMS_TOTP_DEVICE }}
10
+ publish:
11
+ runs-on: ubuntu-latest
12
+ environment: rubygems-publish
13
+ if: github.repository_owner == 'zendesk'
14
+ permissions:
15
+ id-token: write
16
+ contents: write
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - name: Set up Ruby
20
+ uses: ruby/setup-ruby@v1
21
+ with:
22
+ bundler-cache: false
23
+ ruby-version: "3.4"
24
+ - name: Install dependencies
25
+ run: bundle install
26
+ - uses: rubygems/release-gem@v1
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- racecar (2.12.0)
4
+ racecar (3.0.0.alpha.2)
5
5
  king_konf (~> 1.0.0)
6
6
  rdkafka (>= 0.15.0)
7
7
 
@@ -45,7 +45,7 @@ GEM
45
45
  byebug (~> 11.0)
46
46
  pry (>= 0.13, < 0.15)
47
47
  rake (13.0.6)
48
- rdkafka (0.18.0)
48
+ rdkafka (0.21.0)
49
49
  ffi (~> 1.15)
50
50
  mini_portile2 (~> 2.6)
51
51
  rake (> 12)
data/README.md CHANGED
@@ -418,7 +418,7 @@ Racecar supports [Datadog](https://www.datadoghq.com/) monitoring integration. I
418
418
  - `datadog_namespace` – The namespace to use for Datadog metrics.
419
419
  - `datadog_tags` – Tags that should always be set on Datadog metrics.
420
420
 
421
- Furthermore, there's a [standard Datadog dashboard configuration file](https://raw.githubusercontent.com/zendesk/racecar/master/extra/datadog-dashboard.json) that you can import to get started with a Racecar dashboard for all of your consumers.
421
+ Furthermore, there's a [standard Datadog dashboard configuration file](https://raw.githubusercontent.com/zendesk/racecar/main/extra/datadog-dashboard.json) that you can import to get started with a Racecar dashboard for all of your consumers.
422
422
 
423
423
  #### Consumers Without Rails
424
424
 
@@ -729,6 +729,22 @@ There can be behavioural inconsistencies between running the specs on your machi
729
729
 
730
730
  Please note - your code directory is mounted as a volume, so you can make code changes without needing to rebuild
731
731
 
732
+ ### Releasing a new version
733
+
734
+ A new version is published to RubyGems.org every time a change to `version.rb` is pushed to the `main` branch.
735
+ In short, follow these steps:
736
+ 1. Update `version.rb`,
737
+ 2. run `bundle lock` to update `Gemfile.lock`,
738
+ 3. merge this change into `main`, and
739
+ 4. look at [the action](https://github.com/zendesk/racecar/actions/workflows/publish.yml) for output.
740
+
741
+ To create a pre-release from a non-main branch:
742
+ 1. change the version in `version.rb` to something like `2.13.0.pre.1` or `3.0.0.beta.2`,
743
+ 2. push this change to your branch,
744
+ 3. go to [Actions → “Publish to RubyGems.org” on GitHub](https://github.com/zendesk/racecar/actions/workflows/publish.yml),
745
+ 4. click the “Run workflow” button,
746
+ 5. pick your branch from a dropdown.
747
+
732
748
  ## Contributing
733
749
 
734
750
  Bug reports and pull requests are welcome on [GitHub](https://github.com/zendesk/racecar). Feel free to [join our Slack team](https://ruby-kafka-slack.herokuapp.com/) and ask how best to contribute!
data/Rakefile CHANGED
@@ -1,11 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bundler/setup"
3
4
  require "bundler/gem_tasks"
4
5
  require "rspec/core/rake_task"
5
6
 
6
- # Pushing to rubygems is handled by a github workflow
7
- ENV["gem_push"] = "false"
8
-
9
7
  RSpec::Core::RakeTask.new(:spec)
10
8
 
11
9
  task :default => :spec
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'racecar/pause'
4
+ require 'concurrent-ruby'
5
+
6
+ module Racecar
7
+ class AsyncPartitionProcessor
8
+ attr_reader :thread
9
+
10
+ THREAD_KEY_IDENTIFIER = 'racecar_topic_partition_identifier'.freeze
11
+
12
+ def self.thread_key(topic, partition)
13
+ "#{topic}/#{partition}"
14
+ end
15
+
16
+ def initialize(topic:, partition:, logger:, config:, consumer:, consumer_class:, instrumenter:, rdkafka_consumer:)
17
+ @topic = topic
18
+ @partition = partition
19
+ @logger = logger
20
+ @config = config
21
+ @consumer = consumer
22
+ @consumer_class = consumer_class
23
+ @instrumenter = instrumenter
24
+ @rdkafka_consumer = rdkafka_consumer
25
+ @backpressure_paused = Concurrent::AtomicBoolean.new
26
+ @tpl = build_tpl(topic, partition)
27
+ setup_async_processing
28
+ end
29
+
30
+ def process(message)
31
+ push(message)
32
+ end
33
+
34
+ def process_batch(messages)
35
+ push(messages)
36
+ end
37
+
38
+ def rebalance!
39
+ processor.rebalance!
40
+ @queue << nil
41
+ end
42
+
43
+ def shut_down!
44
+ processor.shut_down!
45
+ @queue << nil
46
+ end
47
+
48
+ def rebalancing_or_shutting_down?
49
+ processor.rebalancing_or_shutting_down?
50
+ end
51
+
52
+ def resume_paused_partition
53
+ processor.resume_paused_partition
54
+ end
55
+
56
+ private
57
+
58
+ attr_reader :backpressure_paused, :instrumenter, :consumer_class, :consumer, :queue, :config, :processor, :logger
59
+
60
+ def setup_async_processing
61
+ @processor = PartitionProcessor.new(
62
+ config: config,
63
+ logger: logger,
64
+ instrumenter: instrumenter,
65
+ consumer_class_instance: consumer_class.new,
66
+ consumer: consumer,
67
+ topic: @topic,
68
+ partition: @partition,
69
+ pause: Pause.new_from_config(config),
70
+ rdkafka_consumer: @rdkafka_consumer,
71
+ )
72
+ @queue = Queue.new
73
+ @thread = nil
74
+
75
+ use_process_batch = consumer_class.method_defined?(:process_batch)
76
+
77
+ if use_process_batch
78
+ spawn_thread do |msgs|
79
+ processor.process_batch(msgs)
80
+ end
81
+ else
82
+ spawn_thread do |msgs|
83
+ msgs.each do |msg|
84
+ processor.process(msg)
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ def spawn_thread(&block)
91
+ @thread = Thread.new do
92
+ Thread.current.name = "Racecar thread for #{thread_key}"
93
+ Thread.current[AsyncPartitionProcessor::THREAD_KEY_IDENTIFIER] = thread_key
94
+ main_processing_loop(block)
95
+ end
96
+ end
97
+
98
+ def push(messages)
99
+ @queue << Array(messages)
100
+ maybe_apply_backpressure
101
+ end
102
+
103
+ def maybe_apply_backpressure
104
+ if @backpressure_paused.false? && @queue.size >= config.multithreaded_processing_max_queue_size
105
+ @backpressure_paused.make_true
106
+ @rdkafka_consumer.pause(@tpl)
107
+ logger.debug "Paused partition #{@topic}/#{@partition}: queue reached capacity (#{@queue.size}/#{config.multithreaded_processing_max_queue_size})"
108
+ end
109
+ end
110
+
111
+ def maybe_resume_the_partition
112
+ if @backpressure_paused.true? && @queue.size < config.multithreaded_processing_resume_threshold * config.multithreaded_processing_max_queue_size
113
+ @backpressure_paused.make_false
114
+ @rdkafka_consumer.resume(@tpl)
115
+ end
116
+ end
117
+
118
+ def build_tpl(topic, partition)
119
+ Rdkafka::Consumer::TopicPartitionList.new.tap do |tpl|
120
+ tpl.add_topic_and_partitions_with_offsets(topic, partition => -1001)
121
+ end
122
+ end
123
+
124
+ def thread_key
125
+ self.class.thread_key(@topic, @partition)
126
+ end
127
+
128
+ def main_processing_loop(block)
129
+ loop do
130
+ msgs = @queue.pop
131
+ break if msgs.nil?
132
+
133
+ maybe_resume_the_partition
134
+ block.call(msgs)
135
+ rescue => e
136
+ logger.error "Error in processing thread for #{thread_key}: #{e.class} - #{e.full_message}. backtrace: #{e.backtrace&.first(10)&.join("\n")}"
137
+ end
138
+ ensure
139
+ @processor.teardown
140
+ end
141
+ end
142
+ end
data/lib/racecar/cli.rb CHANGED
@@ -65,8 +65,7 @@ module Racecar
65
65
  config.install_liveness_probe
66
66
  end
67
67
 
68
- processor = consumer_class.new
69
- @runner = Racecar.runner(processor)
68
+ @runner = Racecar.runner(consumer_class)
70
69
  @runner.run
71
70
  nil
72
71
  end
@@ -194,6 +194,18 @@ module Racecar
194
194
  desc "Strategy for switching topics when there are multiple subscriptions. `exhaust-topic` will only switch when the consumer poll returns no messages. `round-robin` will switch after each poll regardless.\nWarning: `round-robin` will be the default in Racecar 3.x"
195
195
  string :multi_subscription_strategy, allowed_values: %w(round-robin exhaust-topic), default: "exhaust-topic"
196
196
 
197
+ desc "Whether multithreaded processing is enabled"
198
+ boolean :multithreaded_processing_enabled, default: false
199
+
200
+ desc "Max size of the queue of messages waiting to be processed when multithreaded processing is enabled"
201
+ integer :multithreaded_processing_max_queue_size, default: 1000
202
+
203
+ desc "Timeout in seconds for the main thread to wait for a processing thread to finish when shutting down the consumer with multithreaded processing enabled"
204
+ integer :multithreaded_processing_shutdown_timeout, default: 300
205
+
206
+ desc "Multi threaded queue resume threshold as a percentage of `multithreaded_processing_max_queue_size`. Defaults to 0.5, meaning that the consumer will attempt to resume a paused partition when the queue size drops below 50% of the max queue size."
207
+ float :multithreaded_processing_resume_threshold, default: 0.5
208
+
197
209
  # The error handler must be set directly on the object.
198
210
  attr_reader :error_handler
199
211
 
@@ -1,21 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "racecar/delivery_callback"
4
+
3
5
  module Racecar
4
6
  class ConsumerSet
5
7
  MAX_POLL_TRIES = 10
6
8
 
7
- def initialize(config, logger, instrumenter = NullInstrumenter)
9
+ def initialize(config, logger, partition_processors, instrumenter = NullInstrumenter)
8
10
  @config, @logger = config, logger
9
11
  @instrumenter = instrumenter
12
+ @partition_processors = partition_processors
10
13
  raise ArgumentError, "Subscriptions must not be empty when subscribing" if @config.subscriptions.empty?
11
14
 
12
15
  @consumers = []
13
16
  @consumer_id_iterator = (0...@config.subscriptions.size).cycle
17
+ @producer_mutex = Mutex.new
14
18
 
15
19
  @previous_retries = 0
16
20
 
17
21
  @last_poll_read_nil_message = false
18
- @paused_tpls = Hash.new { |h, k| h[k] = {} }
19
22
  end
20
23
 
21
24
  def poll(max_wait_time_ms = @config.max_wait_time_ms)
@@ -48,8 +51,9 @@ module Racecar
48
51
  messages
49
52
  end
50
53
 
51
- def store_offset(message)
52
- current.store_offset(message)
54
+ def store_offset(message, raw_consumer = nil)
55
+ consumer = raw_consumer || current
56
+ consumer.store_offset(message)
53
57
  rescue Rdkafka::RdkafkaError => e
54
58
  if e.code == :state # -172
55
59
  @logger.warn "Attempted to store_offset, but we're not subscribed to it: #{ErroneousStateError.new(e)}"
@@ -66,13 +70,28 @@ module Racecar
66
70
 
67
71
  def close
68
72
  each_subscribed(&:close)
69
- @paused_tpls.clear
73
+ reset_producer!
74
+ end
75
+
76
+ def producer
77
+ @producer_mutex.synchronize do
78
+ @producer ||= Rdkafka::Config.new(producer_config).producer.tap do |p|
79
+ p.delivery_callback = Racecar::DeliveryCallback.new(instrumenter: @instrumenter)
80
+ end
81
+ end
82
+ end
83
+
84
+ def reset_producer!
85
+ @producer_mutex.synchronize do
86
+ @producer&.close
87
+ @producer = nil
88
+ end
70
89
  end
71
90
 
72
91
  def current
73
92
  @consumers[@consumer_id_iterator.peek] ||= begin
74
93
  consumer_config = Rdkafka::Config.new(rdkafka_config(current_subscription))
75
- listener = RebalanceListener.new(@config.consumer_class, @instrumenter)
94
+ listener = RebalanceListener.new(@config, @instrumenter, @partition_processors)
76
95
  consumer_config.consumer_rebalance_listener = listener
77
96
  consumer = consumer_config.consumer
78
97
  listener.rdkafka_consumer = consumer
@@ -86,44 +105,38 @@ module Racecar
86
105
 
87
106
  def each_subscribed
88
107
  if block_given?
89
- @consumers.each { |c| yield c }
108
+ @consumers.compact.each { |c| yield c }
90
109
  else
91
- @consumers.each
110
+ @consumers.compact.each
92
111
  end
93
112
  end
94
113
 
95
- def pause(topic, partition, offset)
114
+ def pause(topic, partition, offset = nil)
96
115
  consumer, filtered_tpl = find_consumer_by(topic, partition)
97
- if !consumer
116
+ unless consumer
98
117
  @logger.info "Attempted to pause #{topic}/#{partition}, but we're not subscribed to it"
99
118
  return
100
119
  end
101
120
 
102
121
  consumer.pause(filtered_tpl)
103
- fake_msg = OpenStruct.new(topic: topic, partition: partition, offset: offset)
104
- consumer.seek(fake_msg)
105
-
106
- @paused_tpls[topic][partition] = [consumer, filtered_tpl]
122
+ if offset
123
+ fake_msg = OpenStruct.new(topic: topic, partition: partition, offset: offset)
124
+ consumer.seek(fake_msg)
125
+ end
107
126
  end
108
127
 
109
128
  def resume(topic, partition)
110
129
  consumer, filtered_tpl = find_consumer_by(topic, partition)
111
130
 
112
- if !consumer && @paused_tpls[topic][partition]
113
- consumer, filtered_tpl = @paused_tpls[topic][partition]
114
- end
115
-
116
- if !consumer
131
+ unless consumer
117
132
  @logger.info "Attempted to resume #{topic}/#{partition}, but we're not subscribed to it"
118
133
  return
119
134
  end
120
135
 
121
136
  consumer.resume(filtered_tpl)
122
- @paused_tpls[topic].delete(partition)
123
- @paused_tpls.delete(topic) if @paused_tpls[topic].empty?
124
137
  end
125
138
 
126
- alias :each :each_subscribed
139
+ alias :each :each_subscribed
127
140
 
128
141
  # Subscribe to all topics eagerly, even if there's still messages elsewhere. Usually
129
142
  # that's not needed and Kafka might rebalance if topics are not polled frequently
@@ -269,5 +282,18 @@ module Racecar
269
282
  r = limit_ms - ((Time.now - started_at_time)*1000).round
270
283
  r <= 0 ? 0 : r
271
284
  end
285
+
286
+ def producer_config
287
+ cfg = {
288
+ "bootstrap.servers" => @config.brokers.join(","),
289
+ "client.id" => @config.client_id,
290
+ "statistics.interval.ms" => @config.statistics_interval_ms,
291
+ "message.timeout.ms" => @config.message_timeout * 1000,
292
+ "partitioner" => @config.partitioner.to_s,
293
+ }
294
+ cfg["compression.codec"] = @config.producer_compression_codec.to_s unless @config.producer_compression_codec.nil?
295
+ cfg.merge!(@config.rdkafka_producer)
296
+ cfg
297
+ end
272
298
  end
273
299
  end
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rdkafka"
4
+ require "racecar/pause"
5
+ require "racecar/delivery_callback"
6
+
7
+ module Racecar
8
+ class PartitionProcessor
9
+ attr_reader :consumer_class_instance, :config, :logger, :instrumenter, :consumer, :topic, :partition, :pause
10
+ attr_accessor :rebalancing, :shutting_down
11
+
12
+ def initialize(config:, logger:, instrumenter:, consumer_class_instance:, consumer:, topic:, partition:, pause:, rdkafka_consumer: nil)
13
+ @config = config
14
+ @logger = logger
15
+ @instrumenter = instrumenter
16
+ @consumer_class_instance = consumer_class_instance
17
+ @pause = pause
18
+ @topic = topic
19
+ @partition = partition
20
+ @consumer = consumer
21
+ @rdkafka_consumer = rdkafka_consumer
22
+
23
+ if config.multithreaded_processing_enabled
24
+ consumer_class_instance.configure(
25
+ producer: consumer.producer,
26
+ consumer: @consumer,
27
+ instrumenter: @instrumenter,
28
+ config: @config,
29
+ )
30
+ end
31
+
32
+ @sleep_mutex = Mutex.new
33
+ @sleep_cv = ConditionVariable.new
34
+ end
35
+
36
+ def process(message)
37
+ payload = {
38
+ consumer_class: consumer_class_instance.class.to_s,
39
+ topic: message.topic,
40
+ partition: message.partition,
41
+ offset: message.offset,
42
+ create_time: message.timestamp,
43
+ key: message.key,
44
+ value: message.payload,
45
+ headers: message.headers,
46
+ }
47
+ @instrumenter.instrument("start_process_message", payload)
48
+
49
+ with_error_handling(message, payload) do |pause|
50
+ @instrumenter.instrument("process_message", payload) do
51
+ if @config.multithreaded_processing_enabled && consumer_class_instance.instance_variable_get(:@producer)&.closed?
52
+ reconfigure_consumer_class_instance!
53
+ end
54
+ consumer_class_instance.process(Racecar::Message.new(message, retries_count: pause.pauses_count))
55
+ consumer_class_instance.deliver!
56
+ consumer.store_offset(message, @rdkafka_consumer) unless rebalancing
57
+ end
58
+ end
59
+ end
60
+
61
+ def process_batch(messages)
62
+ first, last = messages.first, messages.last
63
+ payload = {
64
+ consumer_class: consumer_class_instance.class.to_s,
65
+ topic: first.topic,
66
+ partition: first.partition,
67
+ first_offset: first.offset,
68
+ last_offset: last.offset,
69
+ last_create_time: last.timestamp,
70
+ message_count: messages.size,
71
+ }
72
+ @instrumenter.instrument("start_process_batch", payload)
73
+
74
+ with_error_handling(messages, payload) do |pause|
75
+ @instrumenter.instrument("process_batch", payload) do
76
+ racecar_messages = messages.map do |message|
77
+ Racecar::Message.new(message, retries_count: pause.pauses_count)
78
+ end
79
+ if @config.multithreaded_processing_enabled && consumer_class_instance.instance_variable_get(:@producer)&.closed?
80
+ reconfigure_consumer_class_instance!
81
+ end
82
+ consumer_class_instance.process_batch(racecar_messages)
83
+ consumer_class_instance.deliver!
84
+ consumer.store_offset(messages.last, @rdkafka_consumer) unless rebalancing
85
+ end
86
+ end
87
+ end
88
+
89
+ def teardown
90
+ consumer_class_instance.deliver! unless rebalancing
91
+ ensure
92
+ consumer_class_instance.teardown
93
+ end
94
+
95
+ def resume_paused_partition
96
+ return if config.pause_timeout == 0 || !pause.paused?
97
+
98
+ @instrumenter.instrument("pause_status", {
99
+ topic: topic,
100
+ partition: partition,
101
+ duration: pause.pause_duration,
102
+ consumer_class: consumer_class_instance.class.to_s,
103
+ })
104
+
105
+ if pause.paused? && pause.expired?
106
+ logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
107
+ consumer.resume(topic, partition)
108
+ pause.resume!
109
+ end
110
+ end
111
+
112
+ def rebalance!
113
+ @rebalancing = true
114
+ @sleep_mutex.synchronize { @sleep_cv.signal }
115
+ end
116
+
117
+ def shut_down!
118
+ @shutting_down = true
119
+ @sleep_mutex.synchronize { @sleep_cv.signal }
120
+ resume_paused_partition
121
+ end
122
+
123
+ def rebalancing_or_shutting_down?
124
+ rebalancing || shutting_down
125
+ end
126
+
127
+ private
128
+
129
+ def with_error_handling(messages, payload)
130
+ if config.multithreaded_processing_enabled
131
+ with_multi_threaded_error_handling(messages, payload) { |pause| yield(pause) }
132
+ else
133
+ with_single_threaded_error_handling(messages, payload) { |pause| yield(pause) }
134
+ end
135
+ end
136
+
137
+ def with_multi_threaded_error_handling(messages, payload)
138
+ loop do
139
+ begin
140
+ yield(pause)
141
+ pause.reset!
142
+ break
143
+ rescue => e
144
+ if rebalancing
145
+ Thread.exit
146
+ elsif !shutting_down
147
+ handle_processing_error(e, payload, pause: pause)
148
+ pause.pause!
149
+ unless config.pause_timeout <= 0
150
+ @sleep_mutex.synchronize do
151
+ next if rebalancing || shutting_down
152
+ @sleep_cv.wait(@sleep_mutex, pause.backoff_interval)
153
+ end
154
+ end
155
+ Thread.exit if rebalancing
156
+ break if shutting_down || config.pause_timeout <= 0
157
+ else
158
+ handle_processing_error(e, payload, pause: pause)
159
+ break
160
+ end
161
+ end
162
+ end
163
+ end
164
+
165
+ def with_single_threaded_error_handling(messages, payload)
166
+ offsets = messages.is_a?(Array) ? messages.first.offset..messages.last.offset : messages.offset..messages.offset
167
+ with_pause(offsets) do
168
+ yield(pause)
169
+ rescue => e
170
+ handle_processing_error(e, payload, pause: pause)
171
+ raise e
172
+ end
173
+ end
174
+
175
+ def with_pause(offsets)
176
+ return yield if config.pause_timeout == 0
177
+
178
+ begin
179
+ yield
180
+ pause.reset!
181
+ rescue => e
182
+ desc = "#{topic}/#{partition}"
183
+ logger.error "Failed to process #{desc} at #{offsets}: #{e}"
184
+ logger.warn "Pausing partition #{desc} for #{pause.backoff_interval} seconds"
185
+ consumer.pause(topic, partition, offsets.first)
186
+ pause.pause!
187
+ end
188
+ end
189
+
190
+ def handle_processing_error(error, payload, pause:)
191
+ if error.is_a?(Racecar::MessageDeliveryError) && error.code == :msg_timed_out
192
+ logger.error error.to_s
193
+ logger.error "Racecar will reset the producer to force a new broker connection."
194
+ reset_producer!
195
+ payload[:unrecoverable_delivery_error] = true
196
+ else
197
+ payload[:unrecoverable_delivery_error] = false
198
+ end
199
+ payload[:retries_count] = pause.pauses_count
200
+ config.error_handler.call(error, payload)
201
+ end
202
+
203
+ def reset_producer!
204
+ consumer.reset_producer!
205
+ reconfigure_consumer_class_instance!
206
+ end
207
+
208
+ def reconfigure_consumer_class_instance!
209
+ consumer_class_instance.configure(
210
+ producer: consumer.producer,
211
+ consumer: consumer,
212
+ instrumenter: @instrumenter,
213
+ config: @config,
214
+ )
215
+ end
216
+ end
217
+ end
data/lib/racecar/pause.rb CHANGED
@@ -4,6 +4,22 @@ module Racecar
4
4
  class Pause
5
5
  attr_reader :pauses_count
6
6
 
7
+ def self.new_from_config(config)
8
+ timeout = if config.pause_timeout == -1 || config.pause_timeout == 0
9
+ nil
10
+ elsif config.pause_timeout > 0
11
+ config.pause_timeout
12
+ else
13
+ raise ArgumentError, "Invalid value for pause_timeout: must be integer greater or equal -1"
14
+ end
15
+
16
+ new(
17
+ timeout: timeout,
18
+ max_timeout: config.max_pause_timeout,
19
+ exponential_backoff: config.pause_with_exponential_backoff
20
+ )
21
+ end
22
+
7
23
  def initialize(timeout: nil, max_timeout: nil, exponential_backoff: false)
8
24
  @started_at = nil
9
25
  @pauses_count = 0
@@ -1,8 +1,10 @@
1
1
  module Racecar
2
2
  class RebalanceListener
3
- def initialize(consumer_class, instrumenter)
4
- @consumer_class = consumer_class
3
+ def initialize(config, instrumenter, partition_processors)
4
+ @consumer_class = config.consumer_class
5
+ @config = config
5
6
  @instrumenter = instrumenter
7
+ @partition_processors = partition_processors
6
8
  @rdkafka_consumer = nil
7
9
  end
8
10
 
@@ -24,6 +26,14 @@ module Racecar
24
26
 
25
27
  instrument("partitions_revoked", partitions: event.partition_numbers) do
26
28
  consumer_class.on_partitions_revoked(event)
29
+ rdkafka_topic_partition_list.to_h.each do |topic, partitions_metadata|
30
+ partitions_metadata.flatten.map(&:partition).each do |partition|
31
+ key = Runner.topic_partition_key(topic, partition)
32
+ processor = @partition_processors[key]
33
+ processor&.rebalance!
34
+ @partition_processors.delete(key)
35
+ end
36
+ end
27
37
  end
28
38
  end
29
39
 
@@ -6,156 +6,125 @@ require "racecar/message"
6
6
  require "racecar/message_delivery_error"
7
7
  require "racecar/erroneous_state_error"
8
8
  require "racecar/delivery_callback"
9
+ require "racecar/partition_processor"
10
+ require "racecar/async_partition_processor"
9
11
 
10
12
  module Racecar
11
13
  class Runner
12
- attr_reader :processor, :config, :logger
14
+ attr_reader :consumer_class, :config, :logger, :partition_processors
13
15
 
14
- def initialize(processor, config:, logger:, instrumenter: NullInstrumenter)
15
- @processor, @config, @logger = processor, config, logger
16
+ # Kept for backward compatibility — external code calls `processor`.
17
+ def processor
18
+ @consumer_class_instance
19
+ end
20
+
21
+ def initialize(consumer_class, config:, logger:, instrumenter: NullInstrumenter)
22
+ @consumer_class, @config, @logger = consumer_class, config, logger
16
23
  @instrumenter = instrumenter
17
24
  @stop_requested = false
18
- Rdkafka::Config.logger = logger
19
-
20
- if processor.respond_to?(:statistics_callback)
21
- Rdkafka::Config.statistics_callback = processor.method(:statistics_callback).to_proc
25
+ @partition_processors = Concurrent::Hash.new
26
+ @consumer_class_instance = consumer_class.new
27
+ if @consumer_class_instance.respond_to?(:statistics_callback) && Rdkafka::Config.statistics_callback.nil?
28
+ Rdkafka::Config.statistics_callback = @consumer_class_instance.method(:statistics_callback).to_proc
22
29
  end
23
-
24
- setup_pauses
30
+ Rdkafka::Config.logger = logger
25
31
  end
26
32
 
27
- def setup_pauses
28
- timeout = if config.pause_timeout == -1
29
- nil
30
- elsif config.pause_timeout == 0
31
- # no op, handled elsewhere
32
- elsif config.pause_timeout > 0
33
- config.pause_timeout
34
- else
35
- raise ArgumentError, "Invalid value for pause_timeout: must be integer greater or equal -1"
36
- end
37
-
38
- @pauses = Hash.new {|h, k|
39
- h[k] = Hash.new {|h2, k2|
40
- h2[k2] = ::Racecar::Pause.new(
41
- timeout: timeout,
42
- max_timeout: config.max_pause_timeout,
43
- exponential_backoff: config.pause_with_exponential_backoff
44
- )
45
- }
46
- }
33
+ def self.topic_partition_key(topic, partition)
34
+ "#{topic}/#{partition}"
47
35
  end
48
36
 
49
37
  def run
50
38
  install_signal_handlers
51
39
  @stop_requested = false
52
40
 
53
- # Configure the consumer with a producer so it can produce messages and
54
- # with a consumer so that it can support advanced use-cases.
55
- processor.configure(
56
- producer: producer,
57
- consumer: consumer,
58
- instrumenter: @instrumenter,
59
- config: @config,
60
- )
41
+ unless config.multithreaded_processing_enabled
42
+ @consumer_class_instance.configure(
43
+ producer: consumer.producer,
44
+ consumer: consumer,
45
+ instrumenter: @instrumenter,
46
+ config: config,
47
+ )
48
+ end
61
49
 
62
- instrumentation_payload = {
63
- consumer_class: processor.class.to_s,
50
+ loop_payload = {
51
+ consumer_class: consumer_class.to_s,
64
52
  consumer_set: consumer
65
53
  }
66
-
67
54
  # Main loop
68
- loop do
69
- break if @stop_requested
70
- resume_paused_partitions
71
-
72
- @instrumenter.instrument("start_main_loop", instrumentation_payload)
73
- @instrumenter.instrument("main_loop", instrumentation_payload) do
74
- case process_method
75
- when :batch then
76
- msg_per_part = consumer.batch_poll(config.max_wait_time_ms).group_by(&:partition)
77
- msg_per_part.each_value do |messages|
78
- process_batch(messages)
55
+ begin
56
+ loop do
57
+ break if @stop_requested
58
+
59
+ @instrumenter.instrument("start_main_loop", loop_payload)
60
+ @instrumenter.instrument("main_loop", loop_payload) do
61
+ resume_all_paused_partitions unless config.multithreaded_processing_enabled
62
+
63
+ case process_method
64
+ when :batch then
65
+ msg_per_part = consumer.batch_poll(config.max_wait_time_ms).group_by(&:partition)
66
+ msg_per_part.each_value do |messages_per_partition|
67
+ processor = assign_and_get_processor(messages_per_partition)
68
+ processor&.process_batch(messages_per_partition) unless processor&.rebalancing_or_shutting_down?
69
+ end
70
+ when :single then
71
+ message = consumer.poll(config.max_wait_time_ms)
72
+ if message
73
+ processor = assign_and_get_processor(message)
74
+ processor&.process(message) unless processor&.rebalancing_or_shutting_down?
75
+ end
79
76
  end
80
- when :single then
81
- message = consumer.poll(config.max_wait_time_ms)
82
- process(message) if message
83
77
  end
84
78
  end
79
+ ensure
80
+ logger.info "Gracefully shutting down"
81
+ shutdown_processors_and_wait
82
+ consumer.commit
85
83
  end
86
-
87
- logger.info "Gracefully shutting down"
84
+ ensure
88
85
  begin
89
- processor.deliver!
90
- processor.teardown
91
- consumer.commit
92
- ensure
93
86
  @instrumenter.instrument('leave_group') do
94
87
  consumer.close
95
88
  end
89
+ ensure
90
+ Racecar::Datadog.close if config.datadog_enabled
91
+ @instrumenter.instrument("shut_down", loop_payload || {})
96
92
  end
97
- ensure
98
- producer.close
99
- Racecar::Datadog.close if config.datadog_enabled
100
- @instrumenter.instrument("shut_down", instrumentation_payload || {})
101
93
  end
102
94
 
103
95
  def stop
104
96
  @stop_requested = true
105
97
  end
106
98
 
107
- private
99
+ def consumer
100
+ @consumer ||= begin
101
+ ConsumerSet.new(config, logger, @partition_processors, @instrumenter)
102
+ end
103
+ end
108
104
 
109
- attr_reader :pauses
105
+ private
110
106
 
111
107
  def process_method
112
108
  @process_method ||= begin
113
109
  case
114
- when processor.respond_to?(:process_batch)
115
- if processor.method(:process_batch).arity != 1
110
+ when consumer_class.method_defined?(:process_batch)
111
+ if consumer_class.instance_method(:process_batch).arity != 1
116
112
  raise Racecar::Error, "Invalid method signature for `process_batch`. The method must take exactly 1 argument."
117
113
  end
118
114
 
119
115
  :batch
120
- when processor.respond_to?(:process)
121
- if processor.method(:process).arity != 1
116
+ when consumer_class.method_defined?(:process)
117
+ if consumer_class.instance_method(:process).arity != 1
122
118
  raise Racecar::Error, "Invalid method signature for `process`. The method must take exactly 1 argument."
123
119
  end
124
120
 
125
121
  :single
126
122
  else
127
- raise NotImplementedError, "Consumer class `#{processor.class}` must implement a `process` or `process_batch` method"
123
+ raise NotImplementedError, "Consumer class `#{consumer_class}` must implement a `process` or `process_batch` method"
128
124
  end
129
125
  end
130
126
  end
131
127
 
132
- def consumer
133
- @consumer ||= begin
134
- ConsumerSet.new(config, logger, @instrumenter)
135
- end
136
- end
137
-
138
- def producer
139
- @producer ||= Rdkafka::Config.new(producer_config).producer.tap do |producer|
140
- producer.delivery_callback = Racecar::DeliveryCallback.new(instrumenter: @instrumenter)
141
- end
142
- end
143
-
144
- def producer_config
145
- # https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
146
- producer_config = {
147
- "bootstrap.servers" => config.brokers.join(","),
148
- "client.id" => config.client_id,
149
- "statistics.interval.ms" => config.statistics_interval_ms,
150
- "message.timeout.ms" => config.message_timeout * 1000,
151
- "partitioner" => config.partitioner.to_s,
152
- }
153
-
154
- producer_config["compression.codec"] = config.producer_compression_codec.to_s unless config.producer_compression_codec.nil?
155
- producer_config.merge!(config.rdkafka_producer)
156
- producer_config
157
- end
158
-
159
128
  def install_signal_handlers
160
129
  # Stop the consumer on SIGINT, SIGQUIT or SIGTERM.
161
130
  trap("QUIT") { stop }
@@ -166,128 +135,65 @@ module Racecar
166
135
  trap("USR1") { $stderr.puts config.inspect }
167
136
  end
168
137
 
169
- def process(message)
170
- instrumentation_payload = {
171
- consumer_class: processor.class.to_s,
172
- topic: message.topic,
173
- partition: message.partition,
174
- offset: message.offset,
175
- create_time: message.timestamp,
176
- key: message.key,
177
- value: message.payload,
178
- headers: message.headers
179
- }
180
-
181
- @instrumenter.instrument("start_process_message", instrumentation_payload)
182
- with_pause(message.topic, message.partition, message.offset..message.offset) do |pause|
183
- begin
184
- @instrumenter.instrument("process_message", instrumentation_payload) do
185
- processor.process(Racecar::Message.new(message, retries_count: pause.pauses_count))
186
- processor.deliver!
187
- consumer.store_offset(message)
188
- end
189
- rescue => e
190
- instrumentation_payload[:unrecoverable_delivery_error] = reset_producer_on_unrecoverable_delivery_errors(e)
191
- instrumentation_payload[:retries_count] = pause.pauses_count
192
- config.error_handler.call(e, instrumentation_payload)
193
- raise e
194
- end
138
+ def assign_and_get_processor(messages)
139
+ topic = messages.is_a?(Array) ? messages.first.topic : messages.topic
140
+ partition = messages.is_a?(Array) ? messages.first.partition : messages.partition
141
+ key = Runner.topic_partition_key(topic, partition)
142
+ return partition_processors[key] if partition_processors[key]
143
+
144
+ processor = if config.multithreaded_processing_enabled
145
+ AsyncPartitionProcessor.new(
146
+ **common_processor_params,
147
+ consumer_class: consumer_class,
148
+ topic: topic,
149
+ partition: partition,
150
+ rdkafka_consumer: consumer.current,
151
+ )
152
+ else
153
+ PartitionProcessor.new(
154
+ **common_processor_params,
155
+ consumer_class_instance: @consumer_class_instance,
156
+ topic: topic,
157
+ partition: partition,
158
+ pause: Pause.new_from_config(config),
159
+ )
195
160
  end
161
+ partition_processors[key] = processor
196
162
  end
197
163
 
198
- def process_batch(messages)
199
- first, last = messages.first, messages.last
200
- instrumentation_payload = {
201
- consumer_class: processor.class.to_s,
202
- topic: first.topic,
203
- partition: first.partition,
204
- first_offset: first.offset,
205
- last_offset: last.offset,
206
- last_create_time: last.timestamp,
207
- message_count: messages.size
208
- }
209
-
210
- @instrumenter.instrument("start_process_batch", instrumentation_payload)
211
- with_pause(first.topic, first.partition, first.offset..last.offset) do |pause|
212
- begin
213
- @instrumenter.instrument("process_batch", instrumentation_payload) do
214
- racecar_messages = messages.map do |message|
215
- Racecar::Message.new(message, retries_count: pause.pauses_count)
164
+ def shutdown_processors_and_wait
165
+ if config.multithreaded_processing_enabled
166
+ processors_snapshot = partition_processors.values
167
+ processors_snapshot.each { |processor| processor.shut_down! if processor }
168
+ processors_snapshot.each do |processor|
169
+ if processor.respond_to?(:thread)
170
+ begin
171
+ processor.thread.join(config.multithreaded_processing_shutdown_timeout)
172
+ rescue => e
173
+ logger.error "Error while waiting for processor thread to finish: #{e}"
216
174
  end
217
- processor.process_batch(racecar_messages)
218
- processor.deliver!
219
- consumer.store_offset(messages.last)
220
175
  end
221
- rescue => e
222
- instrumentation_payload[:unrecoverable_delivery_error] = reset_producer_on_unrecoverable_delivery_errors(e)
223
- instrumentation_payload[:retries_count] = pause.pauses_count
224
- config.error_handler.call(e, instrumentation_payload)
225
- raise e
176
+ end
177
+ else
178
+ begin
179
+ @consumer_class_instance.deliver!
180
+ ensure
181
+ @consumer_class_instance.teardown
226
182
  end
227
183
  end
228
184
  end
229
185
 
230
- # librdkafka will continue to try to deliver already queued messages, even if ruby-rdkafka
231
- # raised before that. This method detects any unrecoverable errors and resets the producer
232
- # as a last ditch effort.
233
- # The function returns true if there were unrecoverable errors, or false otherwise.
234
- def reset_producer_on_unrecoverable_delivery_errors(error)
235
- return false unless error.is_a?(Racecar::MessageDeliveryError)
236
- return false unless error.code == :msg_timed_out # -192
237
-
238
- logger.error error.to_s
239
- logger.error "Racecar will reset the producer to force a new broker connection."
240
- @producer.close
241
- @producer = nil
242
- processor.configure(
243
- producer: producer,
244
- consumer: consumer,
245
- instrumenter: @instrumenter,
246
- config: @config,
247
- )
248
-
249
- true
186
+ def resume_all_paused_partitions
187
+ partition_processors.values.reject(&:rebalancing_or_shutting_down?).each(&:resume_paused_partition)
250
188
  end
251
189
 
252
- def with_pause(topic, partition, offsets)
253
- pause = pauses[topic][partition]
254
- return yield pause if config.pause_timeout == 0
255
-
256
- begin
257
- yield pause
258
- # We've successfully processed a batch from the partition, so we can clear the pause.
259
- pauses[topic][partition].reset!
260
- rescue => e
261
- desc = "#{topic}/#{partition}"
262
- logger.error "Failed to process #{desc} at #{offsets}: #{e}"
263
-
264
- logger.warn "Pausing partition #{desc} for #{pause.backoff_interval} seconds"
265
- consumer.pause(topic, partition, offsets.first)
266
- pause.pause!
267
- end
268
- end
269
-
270
- def resume_paused_partitions
271
- return if config.pause_timeout == 0
272
-
273
- pauses.each do |topic, partitions|
274
- partitions.each do |partition, pause|
275
- instrumentation_payload = {
276
- topic: topic,
277
- partition: partition,
278
- duration: pause.pause_duration,
279
- consumer_class: processor.class.to_s,
280
- }
281
- @instrumenter.instrument("pause_status", instrumentation_payload)
282
-
283
- if pause.paused? && pause.expired?
284
- logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
285
- consumer.resume(topic, partition)
286
- pause.resume!
287
- # TODO: # During re-balancing we might have lost the paused partition. Check if partition is still in group before seek. ?
288
- end
289
- end
290
- end
190
+ def common_processor_params
191
+ {
192
+ config: config,
193
+ logger: logger,
194
+ instrumenter: @instrumenter,
195
+ consumer: consumer,
196
+ }
291
197
  end
292
198
  end
293
199
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Racecar
4
- VERSION = "2.12.0"
4
+ VERSION = "3.0.0.alpha.2"
5
5
  end
data/lib/racecar.rb CHANGED
@@ -65,12 +65,12 @@ module Racecar
65
65
  config.instrumenter
66
66
  end
67
67
 
68
- def self.run(processor)
69
- runner(processor).run
68
+ def self.run(consumer_class)
69
+ runner(consumer_class).run
70
70
  end
71
71
 
72
- def self.runner(processor)
73
- runner = Runner.new(processor, config: config, logger: logger, instrumenter: config.instrumenter)
72
+ def self.runner(consumer_class)
73
+ runner = Runner.new(consumer_class, config: config, logger: logger, instrumenter: config.instrumenter)
74
74
 
75
75
  if config.parallel_workers && config.parallel_workers > 1
76
76
  ParallelRunner.new(runner: runner, config: config, logger: logger)
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: racecar
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.12.0
4
+ version: 3.0.0.alpha.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  - Benjamin Quorning
9
- autorequire:
10
9
  bindir: exe
11
10
  cert_chain: []
12
- date: 2025-02-27 00:00:00.000000000 Z
11
+ date: 1980-01-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: king_konf
@@ -149,7 +148,6 @@ dependencies:
149
148
  - - ">="
150
149
  - !ruby/object:Gem::Version
151
150
  version: '0'
152
- description:
153
151
  email:
154
152
  - dschierbeck@zendesk.com
155
153
  - bquorning@zendesk.com
@@ -159,6 +157,7 @@ executables:
159
157
  extensions: []
160
158
  extra_rdoc_files: []
161
159
  files:
160
+ - ".github/CODEOWNERS"
162
161
  - ".github/dependabot.yml"
163
162
  - ".github/workflows/ci.yml"
164
163
  - ".github/workflows/publish.yml"
@@ -187,6 +186,7 @@ files:
187
186
  - lib/generators/templates/consumer.rb.erb
188
187
  - lib/generators/templates/racecar.yml.erb
189
188
  - lib/racecar.rb
189
+ - lib/racecar/async_partition_processor.rb
190
190
  - lib/racecar/cli.rb
191
191
  - lib/racecar/config.rb
192
192
  - lib/racecar/consumer.rb
@@ -203,6 +203,7 @@ files:
203
203
  - lib/racecar/message_delivery_error.rb
204
204
  - lib/racecar/null_instrumenter.rb
205
205
  - lib/racecar/parallel_runner.rb
206
+ - lib/racecar/partition_processor.rb
206
207
  - lib/racecar/pause.rb
207
208
  - lib/racecar/producer.rb
208
209
  - lib/racecar/rails_config_file_loader.rb
@@ -214,7 +215,6 @@ homepage: https://github.com/zendesk/racecar
214
215
  licenses:
215
216
  - Apache License Version 2.0
216
217
  metadata: {}
217
- post_install_message:
218
218
  rdoc_options: []
219
219
  require_paths:
220
220
  - lib
@@ -229,8 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
229
229
  - !ruby/object:Gem::Version
230
230
  version: '0'
231
231
  requirements: []
232
- rubygems_version: 3.5.22
233
- signing_key:
232
+ rubygems_version: 3.6.9
234
233
  specification_version: 4
235
234
  summary: A framework for running Kafka consumers
236
235
  test_files: []