ruby-kafka 1.2.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/kafka/cluster.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/broker_pool"
4
+ require "resolv"
4
5
  require "set"
5
6
 
6
7
  module Kafka
@@ -18,7 +19,8 @@ module Kafka
18
19
  # @param seed_brokers [Array<URI>]
19
20
  # @param broker_pool [Kafka::BrokerPool]
20
21
  # @param logger [Logger]
21
- def initialize(seed_brokers:, broker_pool:, logger:)
22
+ # @param resolve_seed_brokers [Boolean] See {Kafka::Client#initialize}
23
+ def initialize(seed_brokers:, broker_pool:, logger:, resolve_seed_brokers: false)
22
24
  if seed_brokers.empty?
23
25
  raise ArgumentError, "At least one seed broker must be configured"
24
26
  end
@@ -26,6 +28,7 @@ module Kafka
26
28
  @logger = TaggedLogger.new(logger)
27
29
  @seed_brokers = seed_brokers
28
30
  @broker_pool = broker_pool
31
+ @resolve_seed_brokers = resolve_seed_brokers
29
32
  @cluster_info = nil
30
33
  @stale = true
31
34
 
@@ -117,7 +120,7 @@ module Kafka
117
120
 
118
121
  # Finds the broker acting as the coordinator of the given group.
119
122
  #
120
- # @param group_id: [String]
123
+ # @param group_id [String]
121
124
  # @return [Broker] the broker that's currently coordinator.
122
125
  def get_group_coordinator(group_id:)
123
126
  @logger.debug "Getting group coordinator for `#{group_id}`"
@@ -127,7 +130,7 @@ module Kafka
127
130
 
128
131
  # Finds the broker acting as the coordinator of the given transaction.
129
132
  #
130
- # @param transactional_id: [String]
133
+ # @param transactional_id [String]
131
134
  # @return [Broker] the broker that's currently coordinator.
132
135
  def get_transaction_coordinator(transactional_id:)
133
136
  @logger.debug "Getting transaction coordinator for `#{transactional_id}`"
@@ -418,32 +421,35 @@ module Kafka
418
421
  # @return [Protocol::MetadataResponse] the cluster metadata.
419
422
  def fetch_cluster_info
420
423
  errors = []
421
-
422
424
  @seed_brokers.shuffle.each do |node|
423
- @logger.info "Fetching cluster metadata from #{node}"
424
-
425
- begin
426
- broker = @broker_pool.connect(node.hostname, node.port)
427
- cluster_info = broker.fetch_metadata(topics: @target_topics)
428
-
429
- if cluster_info.brokers.empty?
430
- @logger.error "No brokers in cluster"
431
- else
432
- @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
433
-
434
- @stale = false
435
-
436
- return cluster_info
425
+ (@resolve_seed_brokers ? Resolv.getaddresses(node.hostname).shuffle : [node.hostname]).each do |hostname_or_ip|
426
+ node_info = node.to_s
427
+ node_info << " (#{hostname_or_ip})" if node.hostname != hostname_or_ip
428
+ @logger.info "Fetching cluster metadata from #{node_info}"
429
+
430
+ begin
431
+ broker = @broker_pool.connect(hostname_or_ip, node.port)
432
+ cluster_info = broker.fetch_metadata(topics: @target_topics)
433
+
434
+ if cluster_info.brokers.empty?
435
+ @logger.error "No brokers in cluster"
436
+ else
437
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
438
+
439
+ @stale = false
440
+
441
+ return cluster_info
442
+ end
443
+ rescue Error => e
444
+ @logger.error "Failed to fetch metadata from #{node_info}: #{e}"
445
+ errors << [node_info, e]
446
+ ensure
447
+ broker.disconnect unless broker.nil?
437
448
  end
438
- rescue Error => e
439
- @logger.error "Failed to fetch metadata from #{node}: #{e}"
440
- errors << [node, e]
441
- ensure
442
- broker.disconnect unless broker.nil?
443
449
  end
444
450
  end
445
451
 
446
- error_description = errors.map {|node, exception| "- #{node}: #{exception}" }.join("\n")
452
+ error_description = errors.map {|node_info, exception| "- #{node_info}: #{exception}" }.join("\n")
447
453
 
448
454
  raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
449
455
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/member_assignment"
4
+
5
+ module Kafka
6
+ class ConsumerGroup
7
+
8
+ # A consumer group partition assignor
9
+ class Assignor
10
+ Partition = Struct.new(:topic, :partition_id)
11
+
12
+ # @param cluster [Kafka::Cluster]
13
+ # @param strategy [Object] an object that implements #protocol_type,
14
+ # #user_data, and #assign.
15
+ def initialize(cluster:, strategy:)
16
+ @cluster = cluster
17
+ @strategy = strategy
18
+ end
19
+
20
+ def protocol_name
21
+ @strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
22
+ end
23
+
24
+ def user_data
25
+ @strategy.user_data if @strategy.respond_to?(:user_data)
26
+ end
27
+
28
+ # Assign the topic partitions to the group members.
29
+ #
30
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
31
+ # mapping member ids to metadata.
32
+ # @param topics [Array<String>] topics
33
+ # @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
34
+ # ids to assignments.
35
+ def assign(members:, topics:)
36
+ topic_partitions = topics.flat_map do |topic|
37
+ begin
38
+ partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
39
+ rescue UnknownTopicOrPartition
40
+ raise UnknownTopicOrPartition, "unknown topic #{topic}"
41
+ end
42
+ partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
43
+ end
44
+
45
+ group_assignment = {}
46
+
47
+ members.each_key do |member_id|
48
+ group_assignment[member_id] = Protocol::MemberAssignment.new
49
+ end
50
+ @strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
51
+ Array(partitions).each do |partition|
52
+ group_assignment[member_id].assign(partition.topic, [partition.partition_id])
53
+ end
54
+ end
55
+
56
+ group_assignment
57
+ rescue Kafka::LeaderNotAvailable
58
+ sleep 1
59
+ retry
60
+ end
61
+ end
62
+ end
63
+ end
@@ -1,13 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "set"
4
+ require "kafka/consumer_group/assignor"
4
5
  require "kafka/round_robin_assignment_strategy"
5
6
 
6
7
  module Kafka
7
8
  class ConsumerGroup
8
9
  attr_reader :assigned_partitions, :generation_id, :group_id
9
10
 
10
- def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:)
11
+ def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
11
12
  @cluster = cluster
12
13
  @logger = TaggedLogger.new(logger)
13
14
  @group_id = group_id
@@ -19,7 +20,10 @@ module Kafka
19
20
  @members = {}
20
21
  @topics = Set.new
21
22
  @assigned_partitions = {}
22
- @assignment_strategy = RoundRobinAssignmentStrategy.new(cluster: @cluster)
23
+ @assignor = Assignor.new(
24
+ cluster: cluster,
25
+ strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
26
+ )
23
27
  @retention_time = retention_time
24
28
  end
25
29
 
@@ -113,9 +117,12 @@ module Kafka
113
117
 
114
118
  Protocol.handle_error(response.error_code)
115
119
  end
116
- rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
120
+ rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
117
121
  @logger.error "Error sending heartbeat: #{e}"
118
122
  raise HeartbeatError, e
123
+ rescue RebalanceInProgress => e
124
+ @logger.warn "Error sending heartbeat: #{e}"
125
+ raise HeartbeatError, e
119
126
  rescue NotCoordinatorForGroup
120
127
  @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
121
128
  sleep 1
@@ -144,6 +151,8 @@ module Kafka
144
151
  rebalance_timeout: @rebalance_timeout,
145
152
  member_id: @member_id,
146
153
  topics: @topics,
154
+ protocol_name: @assignor.protocol_name,
155
+ user_data: @assignor.user_data,
147
156
  )
148
157
 
149
158
  Protocol.handle_error(response.error_code)
@@ -180,9 +189,14 @@ module Kafka
180
189
  if group_leader?
181
190
  @logger.info "Chosen as leader of group `#{@group_id}`"
182
191
 
183
- group_assignment = @assignment_strategy.assign(
184
- members: @members.keys,
185
- topics: @topics,
192
+ topics = Set.new
193
+ @members.each do |_member, metadata|
194
+ metadata.topics.each { |t| topics.add(t) }
195
+ end
196
+
197
+ group_assignment = @assignor.assign(
198
+ members: @members,
199
+ topics: topics,
186
200
  )
187
201
  end
188
202
 
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+
5
+ module Kafka
6
+ class Crc32Hash
7
+
8
+ # crc32 is supported natively
9
+ def load; end
10
+
11
+ def hash(value)
12
+ Zlib.crc32(value)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/crc32_hash"
4
+ require "kafka/murmur2_hash"
5
+
6
+ module Kafka
7
+ module Digest
8
+ FUNCTIONS_BY_NAME = {
9
+ :crc32 => Crc32Hash.new,
10
+ :murmur2 => Murmur2Hash.new
11
+ }.freeze
12
+
13
+ def self.find_digest(name)
14
+ digest = FUNCTIONS_BY_NAME.fetch(name) do
15
+ raise LoadError, "Unknown hash function #{name}"
16
+ end
17
+
18
+ digest.load
19
+ digest
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Murmur2Hash
5
+ SEED = [0x9747b28c].pack('L')
6
+
7
+ def load
8
+ require 'digest/murmurhash'
9
+ rescue LoadError
10
+ raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
11
+ end
12
+
13
+ def hash(value)
14
+ ::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
15
+ end
16
+ end
17
+ end
@@ -1,11 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "zlib"
3
+ require "kafka/digest"
4
4
 
5
5
  module Kafka
6
6
 
7
7
  # Assigns partitions to messages.
8
8
  class Partitioner
9
+ # @param hash_function [Symbol, nil] the algorithm used to compute a messages
10
+ # destination partition. Default is :crc32
11
+ def initialize(hash_function: nil)
12
+ @digest = Digest.find_digest(hash_function || :crc32)
13
+ end
9
14
 
10
15
  # Assigns a partition number based on a partition key. If no explicit
11
16
  # partition key is provided, the message key will be used instead.
@@ -28,7 +33,7 @@ module Kafka
28
33
  if key.nil?
29
34
  rand(partition_count)
30
35
  else
31
- Zlib.crc32(key) % partition_count
36
+ @digest.hash(key) % partition_count
32
37
  end
33
38
  end
34
39
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Kafka
2
4
  module Protocol
3
5
  class AddOffsetsToTxnResponse
@@ -126,7 +126,7 @@ module Kafka
126
126
  # Writes an integer under varints serializing to the IO object.
127
127
  # https://developers.google.com/protocol-buffers/docs/encoding#varints
128
128
  #
129
- # @param string [Integer]
129
+ # @param int [Integer]
130
130
  # @return [nil]
131
131
  def write_varint(int)
132
132
  int = int << 1
@@ -7,14 +7,14 @@ module Kafka
7
7
  class JoinGroupRequest
8
8
  PROTOCOL_TYPE = "consumer"
9
9
 
10
- def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [])
10
+ def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [], protocol_name:, user_data: nil)
11
11
  @group_id = group_id
12
12
  @session_timeout = session_timeout * 1000 # Kafka wants ms.
13
13
  @rebalance_timeout = rebalance_timeout * 1000 # Kafka wants ms.
14
14
  @member_id = member_id || ""
15
15
  @protocol_type = PROTOCOL_TYPE
16
16
  @group_protocols = {
17
- "roundrobin" => ConsumerGroupProtocol.new(topics: topics),
17
+ protocol_name => ConsumerGroupProtocol.new(topics: topics, user_data: user_data),
18
18
  }
19
19
  end
20
20
 
@@ -3,6 +3,8 @@
3
3
  module Kafka
4
4
  module Protocol
5
5
  class JoinGroupResponse
6
+ Metadata = Struct.new(:version, :topics, :user_data)
7
+
6
8
  attr_reader :error_code
7
9
 
8
10
  attr_reader :generation_id, :group_protocol
@@ -25,7 +27,13 @@ module Kafka
25
27
  group_protocol: decoder.string,
26
28
  leader_id: decoder.string,
27
29
  member_id: decoder.string,
28
- members: Hash[decoder.array { [decoder.string, decoder.bytes] }],
30
+ members: Hash[
31
+ decoder.array do
32
+ member_id = decoder.string
33
+ d = Decoder.from_string(decoder.bytes)
34
+ [member_id, Metadata.new(d.int16, d.array { d.string }, d.bytes)]
35
+ end
36
+ ],
29
37
  )
30
38
  end
31
39
  end
@@ -77,7 +77,7 @@ module Kafka
77
77
  record_batch_encoder.write_int8(MAGIC_BYTE)
78
78
 
79
79
  body = encode_record_batch_body
80
- crc = Digest::CRC32c.checksum(body)
80
+ crc = ::Digest::CRC32c.checksum(body)
81
81
 
82
82
  record_batch_encoder.write_int32(crc)
83
83
  record_batch_encoder.write(body)
@@ -213,7 +213,7 @@ module Kafka
213
213
  end
214
214
 
215
215
  def mark_control_record
216
- if in_transaction && is_control_batch
216
+ if is_control_batch
217
217
  record = @records.first
218
218
  record.is_control_record = true unless record.nil?
219
219
  end
@@ -8,7 +8,7 @@ module Kafka
8
8
 
9
9
  class SaslHandshakeRequest
10
10
 
11
- SUPPORTED_MECHANISMS = %w(GSSAPI PLAIN SCRAM-SHA-256 SCRAM-SHA-512 OAUTHBEARER)
11
+ SUPPORTED_MECHANISMS = %w(AWS_MSK_IAM GSSAPI PLAIN SCRAM-SHA-256 SCRAM-SHA-512 OAUTHBEARER)
12
12
 
13
13
  def initialize(mechanism)
14
14
  unless SUPPORTED_MECHANISMS.include?(mechanism)
@@ -13,9 +13,12 @@ module Kafka
13
13
  end
14
14
 
15
15
  def self.decode(decoder)
16
+ error_code = decoder.int16
17
+ member_assignment_bytes = decoder.bytes
18
+
16
19
  new(
17
- error_code: decoder.int16,
18
- member_assignment: MemberAssignment.decode(Decoder.from_string(decoder.bytes)),
20
+ error_code: error_code,
21
+ member_assignment: member_assignment_bytes ? MemberAssignment.decode(Decoder.from_string(member_assignment_bytes)) : nil
19
22
  )
20
23
  end
21
24
  end
@@ -1,17 +1,46 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Kafka
2
4
  module Protocol
3
5
  class TxnOffsetCommitResponse
6
+ class PartitionError
7
+ attr_reader :partition, :error_code
8
+
9
+ def initialize(partition:, error_code:)
10
+ @partition = partition
11
+ @error_code = error_code
12
+ end
13
+ end
14
+
15
+ class TopicPartitionsError
16
+ attr_reader :topic, :partitions
17
+
18
+ def initialize(topic:, partitions:)
19
+ @topic = topic
20
+ @partitions = partitions
21
+ end
22
+ end
4
23
 
5
- attr_reader :error_code
24
+ attr_reader :errors
6
25
 
7
- def initialize(error_code:)
8
- @error_code = error_code
26
+ def initialize(errors:)
27
+ @errors = errors
9
28
  end
10
29
 
11
30
  def self.decode(decoder)
12
31
  _throttle_time_ms = decoder.int32
13
- error_code = decoder.int16
14
- new(error_code: error_code)
32
+ errors = decoder.array do
33
+ TopicPartitionsError.new(
34
+ topic: decoder.string,
35
+ partitions: decoder.array do
36
+ PartitionError.new(
37
+ partition: decoder.int32,
38
+ error_code: decoder.int16
39
+ )
40
+ end
41
+ )
42
+ end
43
+ new(errors: errors)
15
44
  end
16
45
  end
17
46
  end
@@ -1,54 +1,52 @@
1
- # frozen_string_literal: true
2
-
3
- require "kafka/protocol/member_assignment"
4
-
5
1
  module Kafka
6
2
 
7
- # A consumer group partition assignment strategy that assigns partitions to
8
- # consumers in a round-robin fashion.
3
+ # A round robin assignment strategy inpired on the
4
+ # original java client round robin assignor. It's capable
5
+ # of handling identical as well as different topic subscriptions
6
+ # accross the same consumer group.
9
7
  class RoundRobinAssignmentStrategy
10
- def initialize(cluster:)
11
- @cluster = cluster
8
+ def protocol_name
9
+ "roundrobin"
12
10
  end
13
11
 
14
12
  # Assign the topic partitions to the group members.
15
13
  #
16
- # @param members [Array<String>] member ids
17
- # @param topics [Array<String>] topics
18
- # @return [Hash<String, Protocol::MemberAssignment>] a hash mapping member
19
- # ids to assignments.
20
- def assign(members:, topics:)
21
- group_assignment = {}
22
-
23
- members.each do |member_id|
24
- group_assignment[member_id] = Protocol::MemberAssignment.new
25
- end
26
-
27
- topic_partitions = topics.flat_map do |topic|
28
- begin
29
- partitions = @cluster.partitions_for(topic).map(&:partition_id)
30
- rescue UnknownTopicOrPartition
31
- raise UnknownTopicOrPartition, "unknown topic #{topic}"
14
+ # @param cluster [Kafka::Cluster]
15
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
16
+ # mapping member ids to metadata
17
+ # @param partitions [Array<Kafka::ConsumerGroup::Assignor::Partition>] a list of
18
+ # partitions the consumer group processes
19
+ # @return [Hash<String, Array<Kafka::ConsumerGroup::Assignor::Partition>] a hash
20
+ # mapping member ids to partitions.
21
+ def call(cluster:, members:, partitions:)
22
+ partitions_per_member = Hash.new {|h, k| h[k] = [] }
23
+ relevant_partitions = valid_sorted_partitions(members, partitions)
24
+ members_ids = members.keys
25
+ iterator = (0...members.size).cycle
26
+ idx = iterator.next
27
+
28
+ relevant_partitions.each do |partition|
29
+ topic = partition.topic
30
+
31
+ while !members[members_ids[idx]].topics.include?(topic)
32
+ idx = iterator.next
32
33
  end
33
- Array.new(partitions.count) { topic }.zip(partitions)
34
+
35
+ partitions_per_member[members_ids[idx]] << partition
36
+ idx = iterator.next
34
37
  end
35
38
 
36
- partitions_per_member = topic_partitions.group_by.with_index do |_, index|
37
- index % members.count
38
- end.values
39
+ partitions_per_member
40
+ end
39
41
 
40
- members.zip(partitions_per_member).each do |member_id, member_partitions|
41
- unless member_partitions.nil?
42
- member_partitions.each do |topic, partition|
43
- group_assignment[member_id].assign(topic, [partition])
44
- end
45
- end
46
- end
42
+ def valid_sorted_partitions(members, partitions)
43
+ subscribed_topics = members.map do |id, metadata|
44
+ metadata && metadata.topics
45
+ end.flatten.compact
47
46
 
48
- group_assignment
49
- rescue Kafka::LeaderNotAvailable
50
- sleep 1
51
- retry
47
+ partitions
48
+ .select { |partition| subscribed_topics.include?(partition.topic) }
49
+ .sort_by { |partition| partition.topic }
52
50
  end
53
51
  end
54
52
  end