ruby-kafka 0.7.10 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +179 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +40 -0
  6. data/README.md +167 -0
  7. data/lib/kafka/async_producer.rb +60 -42
  8. data/lib/kafka/client.rb +92 -6
  9. data/lib/kafka/cluster.rb +82 -24
  10. data/lib/kafka/connection.rb +3 -0
  11. data/lib/kafka/consumer.rb +61 -11
  12. data/lib/kafka/consumer_group/assignor.rb +63 -0
  13. data/lib/kafka/consumer_group.rb +29 -6
  14. data/lib/kafka/crc32_hash.rb +15 -0
  15. data/lib/kafka/datadog.rb +20 -13
  16. data/lib/kafka/digest.rb +22 -0
  17. data/lib/kafka/fetcher.rb +5 -2
  18. data/lib/kafka/interceptors.rb +33 -0
  19. data/lib/kafka/murmur2_hash.rb +17 -0
  20. data/lib/kafka/offset_manager.rb +12 -1
  21. data/lib/kafka/partitioner.rb +8 -3
  22. data/lib/kafka/producer.rb +13 -5
  23. data/lib/kafka/prometheus.rb +78 -79
  24. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +2 -0
  25. data/lib/kafka/protocol/encoder.rb +1 -1
  26. data/lib/kafka/protocol/join_group_request.rb +8 -2
  27. data/lib/kafka/protocol/join_group_response.rb +9 -1
  28. data/lib/kafka/protocol/metadata_response.rb +1 -1
  29. data/lib/kafka/protocol/offset_fetch_request.rb +3 -1
  30. data/lib/kafka/protocol/record_batch.rb +2 -2
  31. data/lib/kafka/protocol/sasl_handshake_request.rb +1 -1
  32. data/lib/kafka/protocol/sync_group_response.rb +5 -2
  33. data/lib/kafka/protocol/txn_offset_commit_response.rb +34 -5
  34. data/lib/kafka/round_robin_assignment_strategy.rb +37 -39
  35. data/lib/kafka/sasl/awsmskiam.rb +133 -0
  36. data/lib/kafka/sasl_authenticator.rb +15 -2
  37. data/lib/kafka/ssl_context.rb +6 -5
  38. data/lib/kafka/tagged_logger.rb +1 -0
  39. data/lib/kafka/transaction_manager.rb +30 -10
  40. data/lib/kafka/version.rb +1 -1
  41. data/ruby-kafka.gemspec +5 -4
  42. metadata +39 -13
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/consumer_group"
4
+ require "kafka/interceptors"
4
5
  require "kafka/offset_manager"
5
6
  require "kafka/fetcher"
6
7
  require "kafka/pause"
@@ -44,7 +45,8 @@ module Kafka
44
45
  #
45
46
  class Consumer
46
47
 
47
- def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
48
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
49
+ session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
48
50
  @cluster = cluster
49
51
  @logger = TaggedLogger.new(logger)
50
52
  @instrumenter = instrumenter
@@ -53,6 +55,8 @@ module Kafka
53
55
  @session_timeout = session_timeout
54
56
  @fetcher = fetcher
55
57
  @heartbeat = heartbeat
58
+ @refresh_topic_interval = refresh_topic_interval
59
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
56
60
 
57
61
  @pauses = Hash.new {|h, k|
58
62
  h[k] = Hash.new {|h2, k2|
@@ -73,6 +77,15 @@ module Kafka
73
77
  # when user commits message other than last in a batch, this would make ruby-kafka refetch
74
78
  # some already consumed messages
75
79
  @current_offsets = Hash.new { |h, k| h[k] = {} }
80
+
81
+ # Map storing subscribed topics with their configuration
82
+ @subscribed_topics = Hash.new
83
+
84
+ # Set storing topics that matched topics in @subscribed_topics
85
+ @matched_topics = Set.new
86
+
87
+ # Whether join_group must be executed again because new topics are added
88
+ @join_group_for_new_topics = false
76
89
  end
77
90
 
78
91
  # Subscribes the consumer to a topic.
@@ -97,13 +110,12 @@ module Kafka
97
110
  def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
98
111
  default_offset ||= start_from_beginning ? :earliest : :latest
99
112
 
100
- if topic_or_regex.is_a?(Regexp)
101
- cluster_topics.select { |topic| topic =~ topic_or_regex }.each do |topic|
102
- subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
103
- end
104
- else
105
- subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
106
- end
113
+ @subscribed_topics[topic_or_regex] = {
114
+ default_offset: default_offset,
115
+ start_from_beginning: start_from_beginning,
116
+ max_bytes_per_partition: max_bytes_per_partition
117
+ }
118
+ scan_for_subscribing
107
119
 
108
120
  nil
109
121
  end
@@ -116,7 +128,6 @@ module Kafka
116
128
  def stop
117
129
  @running = false
118
130
  @fetcher.stop
119
- @cluster.disconnect
120
131
  end
121
132
 
122
133
  # Pause processing of a specific topic partition.
@@ -212,6 +223,7 @@ module Kafka
212
223
  batches = fetch_batches
213
224
 
214
225
  batches.each do |batch|
226
+ batch = @interceptors.call(batch)
215
227
  batch.messages.each do |message|
216
228
  notification = {
217
229
  topic: message.topic,
@@ -303,11 +315,13 @@ module Kafka
303
315
  unless batch.empty?
304
316
  raw_messages = batch.messages
305
317
  batch.messages = raw_messages.reject(&:is_control_record)
318
+ batch = @interceptors.call(batch)
306
319
 
307
320
  notification = {
308
321
  topic: batch.topic,
309
322
  partition: batch.partition,
310
323
  last_offset: batch.last_offset,
324
+ last_create_time: batch.messages.last && batch.messages.last.create_time,
311
325
  offset_lag: batch.offset_lag,
312
326
  highwater_mark_offset: batch.highwater_mark_offset,
313
327
  message_count: batch.messages.count,
@@ -401,6 +415,7 @@ module Kafka
401
415
  while running?
402
416
  begin
403
417
  @instrumenter.instrument("loop.consumer") do
418
+ refresh_topic_list_if_enabled
404
419
  yield
405
420
  end
406
421
  rescue HeartbeatError
@@ -432,6 +447,7 @@ module Kafka
432
447
  # important that members explicitly tell Kafka when they're leaving.
433
448
  make_final_offsets_commit!
434
449
  @group.leave rescue nil
450
+ @cluster.disconnect
435
451
  @running = false
436
452
  @logger.pop_tags
437
453
  end
@@ -452,6 +468,8 @@ module Kafka
452
468
  end
453
469
 
454
470
  def join_group
471
+ @join_group_for_new_topics = false
472
+
455
473
  old_generation_id = @group.generation_id
456
474
 
457
475
  @group.join
@@ -513,11 +531,19 @@ module Kafka
513
531
  end
514
532
  end
515
533
 
534
+ def refresh_topic_list_if_enabled
535
+ return if @refresh_topic_interval <= 0
536
+ return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
537
+
538
+ scan_for_subscribing
539
+ @refreshed_at = Time.now
540
+ end
541
+
516
542
  def fetch_batches
517
543
  # Return early if the consumer has been stopped.
518
544
  return [] if shutting_down?
519
545
 
520
- join_group unless @group.member?
546
+ join_group if !@group.member? || @join_group_for_new_topics
521
547
 
522
548
  trigger_heartbeat
523
549
 
@@ -525,7 +551,7 @@ module Kafka
525
551
 
526
552
  if !@fetcher.data?
527
553
  @logger.debug "No batches to process"
528
- sleep 2
554
+ sleep(@fetcher.max_wait_time || 2)
529
555
  []
530
556
  else
531
557
  tag, message = @fetcher.poll
@@ -571,10 +597,34 @@ module Kafka
571
597
  end
572
598
  end
573
599
 
600
+ def scan_for_subscribing
601
+ @subscribed_topics.each do |topic_or_regex, config|
602
+ default_offset = config.fetch(:default_offset)
603
+ start_from_beginning = config.fetch(:start_from_beginning)
604
+ max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
605
+ if topic_or_regex.is_a?(Regexp)
606
+ subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
607
+ else
608
+ subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
609
+ end
610
+ end
611
+ end
612
+
613
+ def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
614
+ cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
615
+ subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
616
+ end
617
+ end
618
+
574
619
  def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
620
+ return if @matched_topics.include?(topic)
621
+ @matched_topics.add(topic)
622
+ @join_group_for_new_topics = true
623
+
575
624
  @group.subscribe(topic)
576
625
  @offset_manager.set_default_offset(topic, default_offset)
577
626
  @fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
627
+ @cluster.mark_as_stale!
578
628
  end
579
629
 
580
630
  def cluster_topics
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/member_assignment"
4
+
5
+ module Kafka
6
+ class ConsumerGroup
7
+
8
+ # A consumer group partition assignor
9
+ class Assignor
10
+ Partition = Struct.new(:topic, :partition_id)
11
+
12
+ # @param cluster [Kafka::Cluster]
13
+ # @param strategy [Object] an object that implements #protocol_type,
14
+ # #user_data, and #assign.
15
+ def initialize(cluster:, strategy:)
16
+ @cluster = cluster
17
+ @strategy = strategy
18
+ end
19
+
20
+ def protocol_name
21
+ @strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
22
+ end
23
+
24
+ def user_data
25
+ @strategy.user_data if @strategy.respond_to?(:user_data)
26
+ end
27
+
28
+ # Assign the topic partitions to the group members.
29
+ #
30
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
31
+ # mapping member ids to metadata.
32
+ # @param topics [Array<String>] topics
33
+ # @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
34
+ # ids to assignments.
35
+ def assign(members:, topics:)
36
+ topic_partitions = topics.flat_map do |topic|
37
+ begin
38
+ partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
39
+ rescue UnknownTopicOrPartition
40
+ raise UnknownTopicOrPartition, "unknown topic #{topic}"
41
+ end
42
+ partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
43
+ end
44
+
45
+ group_assignment = {}
46
+
47
+ members.each_key do |member_id|
48
+ group_assignment[member_id] = Protocol::MemberAssignment.new
49
+ end
50
+ @strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
51
+ Array(partitions).each do |partition|
52
+ group_assignment[member_id].assign(partition.topic, [partition.partition_id])
53
+ end
54
+ end
55
+
56
+ group_assignment
57
+ rescue Kafka::LeaderNotAvailable
58
+ sleep 1
59
+ retry
60
+ end
61
+ end
62
+ end
63
+ end
@@ -1,24 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "set"
4
+ require "kafka/consumer_group/assignor"
4
5
  require "kafka/round_robin_assignment_strategy"
5
6
 
6
7
  module Kafka
7
8
  class ConsumerGroup
8
9
  attr_reader :assigned_partitions, :generation_id, :group_id
9
10
 
10
- def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
11
+ def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
11
12
  @cluster = cluster
12
13
  @logger = TaggedLogger.new(logger)
13
14
  @group_id = group_id
14
15
  @session_timeout = session_timeout
16
+ @rebalance_timeout = rebalance_timeout
15
17
  @instrumenter = instrumenter
16
18
  @member_id = ""
17
19
  @generation_id = nil
18
20
  @members = {}
19
21
  @topics = Set.new
20
22
  @assigned_partitions = {}
21
- @assignment_strategy = RoundRobinAssignmentStrategy.new(cluster: @cluster)
23
+ @assignor = Assignor.new(
24
+ cluster: cluster,
25
+ strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
26
+ )
22
27
  @retention_time = retention_time
23
28
  end
24
29
 
@@ -112,9 +117,12 @@ module Kafka
112
117
 
113
118
  Protocol.handle_error(response.error_code)
114
119
  end
115
- rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
120
+ rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
116
121
  @logger.error "Error sending heartbeat: #{e}"
117
122
  raise HeartbeatError, e
123
+ rescue RebalanceInProgress => e
124
+ @logger.warn "Error sending heartbeat: #{e}"
125
+ raise HeartbeatError, e
118
126
  rescue NotCoordinatorForGroup
119
127
  @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
120
128
  sleep 1
@@ -140,7 +148,11 @@ module Kafka
140
148
  response = coordinator.join_group(
141
149
  group_id: @group_id,
142
150
  session_timeout: @session_timeout,
151
+ rebalance_timeout: @rebalance_timeout,
143
152
  member_id: @member_id,
153
+ topics: @topics,
154
+ protocol_name: @assignor.protocol_name,
155
+ user_data: @assignor.user_data,
144
156
  )
145
157
 
146
158
  Protocol.handle_error(response.error_code)
@@ -158,6 +170,12 @@ module Kafka
158
170
  @member_id = ""
159
171
  sleep 1
160
172
 
173
+ retry
174
+ rescue CoordinatorLoadInProgress
175
+ @logger.error "Coordinator broker still loading, retrying in 1s..."
176
+
177
+ sleep 1
178
+
161
179
  retry
162
180
  end
163
181
 
@@ -171,9 +189,14 @@ module Kafka
171
189
  if group_leader?
172
190
  @logger.info "Chosen as leader of group `#{@group_id}`"
173
191
 
174
- group_assignment = @assignment_strategy.assign(
175
- members: @members.keys,
176
- topics: @topics,
192
+ topics = Set.new
193
+ @members.each do |_member, metadata|
194
+ metadata.topics.each { |t| topics.add(t) }
195
+ end
196
+
197
+ group_assignment = @assignor.assign(
198
+ members: @members,
199
+ topics: topics,
177
200
  )
178
201
  end
179
202
 
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+
5
+ module Kafka
6
+ class Crc32Hash
7
+
8
+ # crc32 is supported natively
9
+ def load; end
10
+
11
+ def hash(value)
12
+ Zlib.crc32(value)
13
+ end
14
+ end
15
+ end
data/lib/kafka/datadog.rb CHANGED
@@ -31,7 +31,7 @@ module Kafka
31
31
 
32
32
  class << self
33
33
  def statsd
34
- @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
34
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
35
35
  end
36
36
 
37
37
  def statsd=(statsd)
@@ -40,7 +40,7 @@ module Kafka
40
40
  end
41
41
 
42
42
  def host
43
- @host ||= default_host
43
+ @host
44
44
  end
45
45
 
46
46
  def host=(host)
@@ -49,7 +49,7 @@ module Kafka
49
49
  end
50
50
 
51
51
  def port
52
- @port ||= default_port
52
+ @port
53
53
  end
54
54
 
55
55
  def port=(port)
@@ -57,6 +57,15 @@ module Kafka
57
57
  clear
58
58
  end
59
59
 
60
+ def socket_path
61
+ @socket_path
62
+ end
63
+
64
+ def socket_path=(socket_path)
65
+ @socket_path = socket_path
66
+ clear
67
+ end
68
+
60
69
  def namespace
61
70
  @namespace ||= STATSD_NAMESPACE
62
71
  end
@@ -77,14 +86,6 @@ module Kafka
77
86
 
78
87
  private
79
88
 
80
- def default_host
81
- ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
82
- end
83
-
84
- def default_port
85
- ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
86
- end
87
-
88
89
  def clear
89
90
  @statsd && @statsd.close
90
91
  @statsd = nil
@@ -95,8 +96,8 @@ module Kafka
95
96
  private
96
97
 
97
98
  %w[increment histogram count timing gauge].each do |type|
98
- define_method(type) do |*args|
99
- emit(type, *args)
99
+ define_method(type) do |*args, **kwargs|
100
+ emit(type, *args, **kwargs)
100
101
  end
101
102
  end
102
103
 
@@ -168,6 +169,8 @@ module Kafka
168
169
  def process_batch(event)
169
170
  offset = event.payload.fetch(:last_offset)
170
171
  messages = event.payload.fetch(:message_count)
172
+ create_time = event.payload.fetch(:last_create_time)
173
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
171
174
 
172
175
  tags = {
173
176
  client: event.payload.fetch(:client_id),
@@ -184,6 +187,10 @@ module Kafka
184
187
  end
185
188
 
186
189
  gauge("consumer.offset", offset, tags: tags)
190
+
191
+ if time_lag
192
+ gauge("consumer.time_lag", time_lag, tags: tags)
193
+ end
187
194
  end
188
195
 
189
196
  def fetch_batch(event)
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/crc32_hash"
4
+ require "kafka/murmur2_hash"
5
+
6
+ module Kafka
7
+ module Digest
8
+ FUNCTIONS_BY_NAME = {
9
+ :crc32 => Crc32Hash.new,
10
+ :murmur2 => Murmur2Hash.new
11
+ }.freeze
12
+
13
+ def self.find_digest(name)
14
+ digest = FUNCTIONS_BY_NAME.fetch(name) do
15
+ raise LoadError, "Unknown hash function #{name}"
16
+ end
17
+
18
+ digest.load
19
+ digest
20
+ end
21
+ end
22
+ end
data/lib/kafka/fetcher.rb CHANGED
@@ -4,7 +4,7 @@ require "kafka/fetch_operation"
4
4
 
5
5
  module Kafka
6
6
  class Fetcher
7
- attr_reader :queue
7
+ attr_reader :queue, :max_wait_time
8
8
 
9
9
  def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
10
10
  @cluster = cluster
@@ -17,6 +17,9 @@ module Kafka
17
17
  @commands = Queue.new
18
18
  @next_offsets = Hash.new { |h, k| h[k] = {} }
19
19
 
20
+ # We are only running when someone calls start.
21
+ @running = false
22
+
20
23
  # Long poll until at least this many bytes can be fetched.
21
24
  @min_bytes = 1
22
25
 
@@ -110,7 +113,7 @@ module Kafka
110
113
  elsif @queue.size < @max_queue_size
111
114
  step
112
115
  else
113
- @logger.warn "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
116
+ @logger.info "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
114
117
  sleep 1
115
118
  end
116
119
  ensure
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ # Holds a list of interceptors that implement `call`
5
+ # and wraps calls to a chain of custom interceptors.
6
+ class Interceptors
7
+ def initialize(interceptors:, logger:)
8
+ @interceptors = interceptors || []
9
+ @logger = TaggedLogger.new(logger)
10
+ end
11
+
12
+ # This method is called when the client produces a message or once the batches are fetched.
13
+ # The message returned from the first call is passed to the second interceptor call, and so on in an
14
+ # interceptor chain. This method does not throw exceptions.
15
+ #
16
+ # @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
17
+ # fetched batch.
18
+ #
19
+ # @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
20
+ # returned by the last interceptor.
21
+ def call(intercepted)
22
+ @interceptors.each do |interceptor|
23
+ begin
24
+ intercepted = interceptor.call(intercepted)
25
+ rescue Exception => e
26
+ @logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
27
+ end
28
+ end
29
+
30
+ intercepted
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Murmur2Hash
5
+ SEED = [0x9747b28c].pack('L')
6
+
7
+ def load
8
+ require 'digest/murmurhash'
9
+ rescue LoadError
10
+ raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
11
+ end
12
+
13
+ def hash(value)
14
+ ::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
15
+ end
16
+ end
17
+ end
@@ -50,9 +50,20 @@ module Kafka
50
50
  # @param offset [Integer] the offset of the message that should be marked as processed.
51
51
  # @return [nil]
52
52
  def mark_as_processed(topic, partition, offset)
53
- @uncommitted_offsets += 1
53
+ unless @group.assigned_to?(topic, partition)
54
+ @logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
55
+ return
56
+ end
54
57
  @processed_offsets[topic] ||= {}
55
58
 
59
+ last_processed_offset = @processed_offsets[topic][partition] || -1
60
+ if last_processed_offset > offset + 1
61
+ @logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
62
+ return
63
+ end
64
+
65
+ @uncommitted_offsets += 1
66
+
56
67
  # The committed offset should always be the offset of the next message that the
57
68
  # application will read, thus adding one to the last message processed.
58
69
  @processed_offsets[topic][partition] = offset + 1
@@ -1,11 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "zlib"
3
+ require "kafka/digest"
4
4
 
5
5
  module Kafka
6
6
 
7
7
  # Assigns partitions to messages.
8
8
  class Partitioner
9
+ # @param hash_function [Symbol, nil] the algorithm used to compute a messages
10
+ # destination partition. Default is :crc32
11
+ def initialize(hash_function: nil)
12
+ @digest = Digest.find_digest(hash_function || :crc32)
13
+ end
9
14
 
10
15
  # Assigns a partition number based on a partition key. If no explicit
11
16
  # partition key is provided, the message key will be used instead.
@@ -19,7 +24,7 @@ module Kafka
19
24
  # @param message [Kafka::PendingMessage] the message that should be assigned
20
25
  # a partition.
21
26
  # @return [Integer] the partition number.
22
- def self.partition_for_key(partition_count, message)
27
+ def call(partition_count, message)
23
28
  raise ArgumentError if partition_count == 0
24
29
 
25
30
  # If no explicit partition key is specified we use the message key instead.
@@ -28,7 +33,7 @@ module Kafka
28
33
  if key.nil?
29
34
  rand(partition_count)
30
35
  else
31
- Zlib.crc32(key) % partition_count
36
+ @digest.hash(key) % partition_count
32
37
  end
33
38
  end
34
39
  end
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
7
7
  require "kafka/pending_message_queue"
8
8
  require "kafka/pending_message"
9
9
  require "kafka/compressor"
10
+ require "kafka/interceptors"
10
11
 
11
12
  module Kafka
12
13
  # Allows sending messages to a Kafka cluster.
@@ -129,7 +130,9 @@ module Kafka
129
130
  class Producer
130
131
  class AbortTransaction < StandardError; end
131
132
 
132
- def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
133
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
134
+ required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
135
+ max_buffer_bytesize:, partitioner:, interceptors: [])
133
136
  @cluster = cluster
134
137
  @transaction_manager = transaction_manager
135
138
  @logger = TaggedLogger.new(logger)
@@ -141,6 +144,8 @@ module Kafka
141
144
  @max_buffer_size = max_buffer_size
142
145
  @max_buffer_bytesize = max_buffer_bytesize
143
146
  @compressor = compressor
147
+ @partitioner = partitioner
148
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
144
149
 
145
150
  # The set of topics that are produced to.
146
151
  @target_topics = Set.new
@@ -188,15 +193,18 @@ module Kafka
188
193
  # @raise [BufferOverflow] if the maximum buffer size has been reached.
189
194
  # @return [nil]
190
195
  def produce(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, create_time: Time.now)
191
- message = PendingMessage.new(
196
+ # We want to fail fast if `topic` isn't a String
197
+ topic = topic.to_str
198
+
199
+ message = @interceptors.call(PendingMessage.new(
192
200
  value: value && value.to_s,
193
201
  key: key && key.to_s,
194
202
  headers: headers,
195
- topic: topic.to_s,
203
+ topic: topic,
196
204
  partition: partition && Integer(partition),
197
205
  partition_key: partition_key && partition_key.to_s,
198
206
  create_time: create_time
199
- )
207
+ ))
200
208
 
201
209
  if buffer_size >= @max_buffer_size
202
210
  buffer_overflow topic,
@@ -452,7 +460,7 @@ module Kafka
452
460
 
453
461
  if partition.nil?
454
462
  partition_count = @cluster.partitions_for(message.topic).count
455
- partition = Partitioner.partition_for_key(partition_count, message)
463
+ partition = @partitioner.call(partition_count, message)
456
464
  end
457
465
 
458
466
  @buffer.write(