ruby-kafka 1.2.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/kafka/cluster.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/broker_pool"
4
+ require "resolv"
4
5
  require "set"
5
6
 
6
7
  module Kafka
@@ -18,7 +19,8 @@ module Kafka
18
19
  # @param seed_brokers [Array<URI>]
19
20
  # @param broker_pool [Kafka::BrokerPool]
20
21
  # @param logger [Logger]
21
- def initialize(seed_brokers:, broker_pool:, logger:)
22
+ # @param resolve_seed_brokers [Boolean] See {Kafka::Client#initialize}
23
+ def initialize(seed_brokers:, broker_pool:, logger:, resolve_seed_brokers: false)
22
24
  if seed_brokers.empty?
23
25
  raise ArgumentError, "At least one seed broker must be configured"
24
26
  end
@@ -26,6 +28,7 @@ module Kafka
26
28
  @logger = TaggedLogger.new(logger)
27
29
  @seed_brokers = seed_brokers
28
30
  @broker_pool = broker_pool
31
+ @resolve_seed_brokers = resolve_seed_brokers
29
32
  @cluster_info = nil
30
33
  @stale = true
31
34
 
@@ -117,7 +120,7 @@ module Kafka
117
120
 
118
121
  # Finds the broker acting as the coordinator of the given group.
119
122
  #
120
- # @param group_id: [String]
123
+ # @param group_id [String]
121
124
  # @return [Broker] the broker that's currently coordinator.
122
125
  def get_group_coordinator(group_id:)
123
126
  @logger.debug "Getting group coordinator for `#{group_id}`"
@@ -127,7 +130,7 @@ module Kafka
127
130
 
128
131
  # Finds the broker acting as the coordinator of the given transaction.
129
132
  #
130
- # @param transactional_id: [String]
133
+ # @param transactional_id [String]
131
134
  # @return [Broker] the broker that's currently coordinator.
132
135
  def get_transaction_coordinator(transactional_id:)
133
136
  @logger.debug "Getting transaction coordinator for `#{transactional_id}`"
@@ -418,32 +421,35 @@ module Kafka
418
421
  # @return [Protocol::MetadataResponse] the cluster metadata.
419
422
  def fetch_cluster_info
420
423
  errors = []
421
-
422
424
  @seed_brokers.shuffle.each do |node|
423
- @logger.info "Fetching cluster metadata from #{node}"
424
-
425
- begin
426
- broker = @broker_pool.connect(node.hostname, node.port)
427
- cluster_info = broker.fetch_metadata(topics: @target_topics)
428
-
429
- if cluster_info.brokers.empty?
430
- @logger.error "No brokers in cluster"
431
- else
432
- @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
433
-
434
- @stale = false
435
-
436
- return cluster_info
425
+ (@resolve_seed_brokers ? Resolv.getaddresses(node.hostname).shuffle : [node.hostname]).each do |hostname_or_ip|
426
+ node_info = node.to_s
427
+ node_info << " (#{hostname_or_ip})" if node.hostname != hostname_or_ip
428
+ @logger.info "Fetching cluster metadata from #{node_info}"
429
+
430
+ begin
431
+ broker = @broker_pool.connect(hostname_or_ip, node.port)
432
+ cluster_info = broker.fetch_metadata(topics: @target_topics)
433
+
434
+ if cluster_info.brokers.empty?
435
+ @logger.error "No brokers in cluster"
436
+ else
437
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
438
+
439
+ @stale = false
440
+
441
+ return cluster_info
442
+ end
443
+ rescue Error => e
444
+ @logger.error "Failed to fetch metadata from #{node_info}: #{e}"
445
+ errors << [node_info, e]
446
+ ensure
447
+ broker.disconnect unless broker.nil?
437
448
  end
438
- rescue Error => e
439
- @logger.error "Failed to fetch metadata from #{node}: #{e}"
440
- errors << [node, e]
441
- ensure
442
- broker.disconnect unless broker.nil?
443
449
  end
444
450
  end
445
451
 
446
- error_description = errors.map {|node, exception| "- #{node}: #{exception}" }.join("\n")
452
+ error_description = errors.map {|node_info, exception| "- #{node_info}: #{exception}" }.join("\n")
447
453
 
448
454
  raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
449
455
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/member_assignment"
4
+
5
+ module Kafka
6
+ class ConsumerGroup
7
+
8
+ # A consumer group partition assignor
9
+ class Assignor
10
+ Partition = Struct.new(:topic, :partition_id)
11
+
12
+ # @param cluster [Kafka::Cluster]
13
+ # @param strategy [Object] an object that implements #protocol_type,
14
+ # #user_data, and #assign.
15
+ def initialize(cluster:, strategy:)
16
+ @cluster = cluster
17
+ @strategy = strategy
18
+ end
19
+
20
+ def protocol_name
21
+ @strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
22
+ end
23
+
24
+ def user_data
25
+ @strategy.user_data if @strategy.respond_to?(:user_data)
26
+ end
27
+
28
+ # Assign the topic partitions to the group members.
29
+ #
30
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
31
+ # mapping member ids to metadata.
32
+ # @param topics [Array<String>] topics
33
+ # @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
34
+ # ids to assignments.
35
+ def assign(members:, topics:)
36
+ topic_partitions = topics.flat_map do |topic|
37
+ begin
38
+ partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
39
+ rescue UnknownTopicOrPartition
40
+ raise UnknownTopicOrPartition, "unknown topic #{topic}"
41
+ end
42
+ partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
43
+ end
44
+
45
+ group_assignment = {}
46
+
47
+ members.each_key do |member_id|
48
+ group_assignment[member_id] = Protocol::MemberAssignment.new
49
+ end
50
+ @strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
51
+ Array(partitions).each do |partition|
52
+ group_assignment[member_id].assign(partition.topic, [partition.partition_id])
53
+ end
54
+ end
55
+
56
+ group_assignment
57
+ rescue Kafka::LeaderNotAvailable
58
+ sleep 1
59
+ retry
60
+ end
61
+ end
62
+ end
63
+ end
@@ -1,13 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "set"
4
+ require "kafka/consumer_group/assignor"
4
5
  require "kafka/round_robin_assignment_strategy"
5
6
 
6
7
  module Kafka
7
8
  class ConsumerGroup
8
9
  attr_reader :assigned_partitions, :generation_id, :group_id
9
10
 
10
- def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:)
11
+ def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
11
12
  @cluster = cluster
12
13
  @logger = TaggedLogger.new(logger)
13
14
  @group_id = group_id
@@ -19,7 +20,10 @@ module Kafka
19
20
  @members = {}
20
21
  @topics = Set.new
21
22
  @assigned_partitions = {}
22
- @assignment_strategy = RoundRobinAssignmentStrategy.new(cluster: @cluster)
23
+ @assignor = Assignor.new(
24
+ cluster: cluster,
25
+ strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
26
+ )
23
27
  @retention_time = retention_time
24
28
  end
25
29
 
@@ -113,9 +117,12 @@ module Kafka
113
117
 
114
118
  Protocol.handle_error(response.error_code)
115
119
  end
116
- rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
120
+ rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
117
121
  @logger.error "Error sending heartbeat: #{e}"
118
122
  raise HeartbeatError, e
123
+ rescue RebalanceInProgress => e
124
+ @logger.warn "Error sending heartbeat: #{e}"
125
+ raise HeartbeatError, e
119
126
  rescue NotCoordinatorForGroup
120
127
  @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
121
128
  sleep 1
@@ -144,6 +151,8 @@ module Kafka
144
151
  rebalance_timeout: @rebalance_timeout,
145
152
  member_id: @member_id,
146
153
  topics: @topics,
154
+ protocol_name: @assignor.protocol_name,
155
+ user_data: @assignor.user_data,
147
156
  )
148
157
 
149
158
  Protocol.handle_error(response.error_code)
@@ -180,9 +189,14 @@ module Kafka
180
189
  if group_leader?
181
190
  @logger.info "Chosen as leader of group `#{@group_id}`"
182
191
 
183
- group_assignment = @assignment_strategy.assign(
184
- members: @members.keys,
185
- topics: @topics,
192
+ topics = Set.new
193
+ @members.each do |_member, metadata|
194
+ metadata.topics.each { |t| topics.add(t) }
195
+ end
196
+
197
+ group_assignment = @assignor.assign(
198
+ members: @members,
199
+ topics: topics,
186
200
  )
187
201
  end
188
202
 
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+
5
+ module Kafka
6
+ class Crc32Hash
7
+
8
+ # crc32 is supported natively
9
+ def load; end
10
+
11
+ def hash(value)
12
+ Zlib.crc32(value)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/crc32_hash"
4
+ require "kafka/murmur2_hash"
5
+
6
+ module Kafka
7
+ module Digest
8
+ FUNCTIONS_BY_NAME = {
9
+ :crc32 => Crc32Hash.new,
10
+ :murmur2 => Murmur2Hash.new
11
+ }.freeze
12
+
13
+ def self.find_digest(name)
14
+ digest = FUNCTIONS_BY_NAME.fetch(name) do
15
+ raise LoadError, "Unknown hash function #{name}"
16
+ end
17
+
18
+ digest.load
19
+ digest
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Murmur2Hash
5
+ SEED = [0x9747b28c].pack('L')
6
+
7
+ def load
8
+ require 'digest/murmurhash'
9
+ rescue LoadError
10
+ raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
11
+ end
12
+
13
+ def hash(value)
14
+ ::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
15
+ end
16
+ end
17
+ end
@@ -1,11 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "zlib"
3
+ require "kafka/digest"
4
4
 
5
5
  module Kafka
6
6
 
7
7
  # Assigns partitions to messages.
8
8
  class Partitioner
9
+ # @param hash_function [Symbol, nil] the algorithm used to compute a messages
10
+ # destination partition. Default is :crc32
11
+ def initialize(hash_function: nil)
12
+ @digest = Digest.find_digest(hash_function || :crc32)
13
+ end
9
14
 
10
15
  # Assigns a partition number based on a partition key. If no explicit
11
16
  # partition key is provided, the message key will be used instead.
@@ -28,7 +33,7 @@ module Kafka
28
33
  if key.nil?
29
34
  rand(partition_count)
30
35
  else
31
- Zlib.crc32(key) % partition_count
36
+ @digest.hash(key) % partition_count
32
37
  end
33
38
  end
34
39
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Kafka
2
4
  module Protocol
3
5
  class AddOffsetsToTxnResponse
@@ -126,7 +126,7 @@ module Kafka
126
126
  # Writes an integer under varints serializing to the IO object.
127
127
  # https://developers.google.com/protocol-buffers/docs/encoding#varints
128
128
  #
129
- # @param string [Integer]
129
+ # @param int [Integer]
130
130
  # @return [nil]
131
131
  def write_varint(int)
132
132
  int = int << 1
@@ -7,14 +7,14 @@ module Kafka
7
7
  class JoinGroupRequest
8
8
  PROTOCOL_TYPE = "consumer"
9
9
 
10
- def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [])
10
+ def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [], protocol_name:, user_data: nil)
11
11
  @group_id = group_id
12
12
  @session_timeout = session_timeout * 1000 # Kafka wants ms.
13
13
  @rebalance_timeout = rebalance_timeout * 1000 # Kafka wants ms.
14
14
  @member_id = member_id || ""
15
15
  @protocol_type = PROTOCOL_TYPE
16
16
  @group_protocols = {
17
- "roundrobin" => ConsumerGroupProtocol.new(topics: topics),
17
+ protocol_name => ConsumerGroupProtocol.new(topics: topics, user_data: user_data),
18
18
  }
19
19
  end
20
20
 
@@ -3,6 +3,8 @@
3
3
  module Kafka
4
4
  module Protocol
5
5
  class JoinGroupResponse
6
+ Metadata = Struct.new(:version, :topics, :user_data)
7
+
6
8
  attr_reader :error_code
7
9
 
8
10
  attr_reader :generation_id, :group_protocol
@@ -25,7 +27,13 @@ module Kafka
25
27
  group_protocol: decoder.string,
26
28
  leader_id: decoder.string,
27
29
  member_id: decoder.string,
28
- members: Hash[decoder.array { [decoder.string, decoder.bytes] }],
30
+ members: Hash[
31
+ decoder.array do
32
+ member_id = decoder.string
33
+ d = Decoder.from_string(decoder.bytes)
34
+ [member_id, Metadata.new(d.int16, d.array { d.string }, d.bytes)]
35
+ end
36
+ ],
29
37
  )
30
38
  end
31
39
  end
@@ -77,7 +77,7 @@ module Kafka
77
77
  record_batch_encoder.write_int8(MAGIC_BYTE)
78
78
 
79
79
  body = encode_record_batch_body
80
- crc = Digest::CRC32c.checksum(body)
80
+ crc = ::Digest::CRC32c.checksum(body)
81
81
 
82
82
  record_batch_encoder.write_int32(crc)
83
83
  record_batch_encoder.write(body)
@@ -213,7 +213,7 @@ module Kafka
213
213
  end
214
214
 
215
215
  def mark_control_record
216
- if in_transaction && is_control_batch
216
+ if is_control_batch
217
217
  record = @records.first
218
218
  record.is_control_record = true unless record.nil?
219
219
  end
@@ -8,7 +8,7 @@ module Kafka
8
8
 
9
9
  class SaslHandshakeRequest
10
10
 
11
- SUPPORTED_MECHANISMS = %w(GSSAPI PLAIN SCRAM-SHA-256 SCRAM-SHA-512 OAUTHBEARER)
11
+ SUPPORTED_MECHANISMS = %w(AWS_MSK_IAM GSSAPI PLAIN SCRAM-SHA-256 SCRAM-SHA-512 OAUTHBEARER)
12
12
 
13
13
  def initialize(mechanism)
14
14
  unless SUPPORTED_MECHANISMS.include?(mechanism)
@@ -13,9 +13,12 @@ module Kafka
13
13
  end
14
14
 
15
15
  def self.decode(decoder)
16
+ error_code = decoder.int16
17
+ member_assignment_bytes = decoder.bytes
18
+
16
19
  new(
17
- error_code: decoder.int16,
18
- member_assignment: MemberAssignment.decode(Decoder.from_string(decoder.bytes)),
20
+ error_code: error_code,
21
+ member_assignment: member_assignment_bytes ? MemberAssignment.decode(Decoder.from_string(member_assignment_bytes)) : nil
19
22
  )
20
23
  end
21
24
  end
@@ -1,17 +1,46 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Kafka
2
4
  module Protocol
3
5
  class TxnOffsetCommitResponse
6
+ class PartitionError
7
+ attr_reader :partition, :error_code
8
+
9
+ def initialize(partition:, error_code:)
10
+ @partition = partition
11
+ @error_code = error_code
12
+ end
13
+ end
14
+
15
+ class TopicPartitionsError
16
+ attr_reader :topic, :partitions
17
+
18
+ def initialize(topic:, partitions:)
19
+ @topic = topic
20
+ @partitions = partitions
21
+ end
22
+ end
4
23
 
5
- attr_reader :error_code
24
+ attr_reader :errors
6
25
 
7
- def initialize(error_code:)
8
- @error_code = error_code
26
+ def initialize(errors:)
27
+ @errors = errors
9
28
  end
10
29
 
11
30
  def self.decode(decoder)
12
31
  _throttle_time_ms = decoder.int32
13
- error_code = decoder.int16
14
- new(error_code: error_code)
32
+ errors = decoder.array do
33
+ TopicPartitionsError.new(
34
+ topic: decoder.string,
35
+ partitions: decoder.array do
36
+ PartitionError.new(
37
+ partition: decoder.int32,
38
+ error_code: decoder.int16
39
+ )
40
+ end
41
+ )
42
+ end
43
+ new(errors: errors)
15
44
  end
16
45
  end
17
46
  end
@@ -1,54 +1,52 @@
1
- # frozen_string_literal: true
2
-
3
- require "kafka/protocol/member_assignment"
4
-
5
1
  module Kafka
6
2
 
7
- # A consumer group partition assignment strategy that assigns partitions to
8
- # consumers in a round-robin fashion.
3
+ # A round robin assignment strategy inpired on the
4
+ # original java client round robin assignor. It's capable
5
+ # of handling identical as well as different topic subscriptions
6
+ # accross the same consumer group.
9
7
  class RoundRobinAssignmentStrategy
10
- def initialize(cluster:)
11
- @cluster = cluster
8
+ def protocol_name
9
+ "roundrobin"
12
10
  end
13
11
 
14
12
  # Assign the topic partitions to the group members.
15
13
  #
16
- # @param members [Array<String>] member ids
17
- # @param topics [Array<String>] topics
18
- # @return [Hash<String, Protocol::MemberAssignment>] a hash mapping member
19
- # ids to assignments.
20
- def assign(members:, topics:)
21
- group_assignment = {}
22
-
23
- members.each do |member_id|
24
- group_assignment[member_id] = Protocol::MemberAssignment.new
25
- end
26
-
27
- topic_partitions = topics.flat_map do |topic|
28
- begin
29
- partitions = @cluster.partitions_for(topic).map(&:partition_id)
30
- rescue UnknownTopicOrPartition
31
- raise UnknownTopicOrPartition, "unknown topic #{topic}"
14
+ # @param cluster [Kafka::Cluster]
15
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
16
+ # mapping member ids to metadata
17
+ # @param partitions [Array<Kafka::ConsumerGroup::Assignor::Partition>] a list of
18
+ # partitions the consumer group processes
19
+ # @return [Hash<String, Array<Kafka::ConsumerGroup::Assignor::Partition>] a hash
20
+ # mapping member ids to partitions.
21
+ def call(cluster:, members:, partitions:)
22
+ partitions_per_member = Hash.new {|h, k| h[k] = [] }
23
+ relevant_partitions = valid_sorted_partitions(members, partitions)
24
+ members_ids = members.keys
25
+ iterator = (0...members.size).cycle
26
+ idx = iterator.next
27
+
28
+ relevant_partitions.each do |partition|
29
+ topic = partition.topic
30
+
31
+ while !members[members_ids[idx]].topics.include?(topic)
32
+ idx = iterator.next
32
33
  end
33
- Array.new(partitions.count) { topic }.zip(partitions)
34
+
35
+ partitions_per_member[members_ids[idx]] << partition
36
+ idx = iterator.next
34
37
  end
35
38
 
36
- partitions_per_member = topic_partitions.group_by.with_index do |_, index|
37
- index % members.count
38
- end.values
39
+ partitions_per_member
40
+ end
39
41
 
40
- members.zip(partitions_per_member).each do |member_id, member_partitions|
41
- unless member_partitions.nil?
42
- member_partitions.each do |topic, partition|
43
- group_assignment[member_id].assign(topic, [partition])
44
- end
45
- end
46
- end
42
+ def valid_sorted_partitions(members, partitions)
43
+ subscribed_topics = members.map do |id, metadata|
44
+ metadata && metadata.topics
45
+ end.flatten.compact
47
46
 
48
- group_assignment
49
- rescue Kafka::LeaderNotAvailable
50
- sleep 1
51
- retry
47
+ partitions
48
+ .select { |partition| subscribed_topics.include?(partition.topic) }
49
+ .sort_by { |partition| partition.topic }
52
50
  end
53
51
  end
54
52
  end