ruby-kafka 1.1.0.beta1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+
5
+ module Kafka
6
+ class Crc32Hash
7
+
8
+ # crc32 is supported natively
9
+ def load; end
10
+
11
+ def hash(value)
12
+ Zlib.crc32(value)
13
+ end
14
+ end
15
+ end
data/lib/kafka/datadog.rb CHANGED
@@ -31,7 +31,7 @@ module Kafka
31
31
 
32
32
  class << self
33
33
  def statsd
34
- @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
34
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
35
35
  end
36
36
 
37
37
  def statsd=(statsd)
@@ -57,6 +57,15 @@ module Kafka
57
57
  clear
58
58
  end
59
59
 
60
+ def socket_path
61
+ @socket_path
62
+ end
63
+
64
+ def socket_path=(socket_path)
65
+ @socket_path = socket_path
66
+ clear
67
+ end
68
+
60
69
  def namespace
61
70
  @namespace ||= STATSD_NAMESPACE
62
71
  end
@@ -87,8 +96,8 @@ module Kafka
87
96
  private
88
97
 
89
98
  %w[increment histogram count timing gauge].each do |type|
90
- define_method(type) do |*args|
91
- emit(type, *args)
99
+ define_method(type) do |*args, **kwargs|
100
+ emit(type, *args, **kwargs)
92
101
  end
93
102
  end
94
103
 
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/crc32_hash"
4
+ require "kafka/murmur2_hash"
5
+
6
+ module Kafka
7
+ module Digest
8
+ FUNCTIONS_BY_NAME = {
9
+ :crc32 => Crc32Hash.new,
10
+ :murmur2 => Murmur2Hash.new
11
+ }.freeze
12
+
13
+ def self.find_digest(name)
14
+ digest = FUNCTIONS_BY_NAME.fetch(name) do
15
+ raise LoadError, "Unknown hash function #{name}"
16
+ end
17
+
18
+ digest.load
19
+ digest
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ # Holds a list of interceptors that implement `call`
5
+ # and wraps calls to a chain of custom interceptors.
6
+ class Interceptors
7
+ def initialize(interceptors:, logger:)
8
+ @interceptors = interceptors || []
9
+ @logger = TaggedLogger.new(logger)
10
+ end
11
+
12
+ # This method is called when the client produces a message or once the batches are fetched.
13
+ # The message returned from the first call is passed to the second interceptor call, and so on in an
14
+ # interceptor chain. This method does not throw exceptions.
15
+ #
16
+ # @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
17
+ # fetched batch.
18
+ #
19
+ # @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
20
+ # returned by the last interceptor.
21
+ def call(intercepted)
22
+ @interceptors.each do |interceptor|
23
+ begin
24
+ intercepted = interceptor.call(intercepted)
25
+ rescue Exception => e
26
+ @logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
27
+ end
28
+ end
29
+
30
+ intercepted
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Murmur2Hash
5
+ SEED = [0x9747b28c].pack('L')
6
+
7
+ def load
8
+ require 'digest/murmurhash'
9
+ rescue LoadError
10
+ raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
11
+ end
12
+
13
+ def hash(value)
14
+ ::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
15
+ end
16
+ end
17
+ end
@@ -50,9 +50,20 @@ module Kafka
50
50
  # @param offset [Integer] the offset of the message that should be marked as processed.
51
51
  # @return [nil]
52
52
  def mark_as_processed(topic, partition, offset)
53
- @uncommitted_offsets += 1
53
+ unless @group.assigned_to?(topic, partition)
54
+ @logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
55
+ return
56
+ end
54
57
  @processed_offsets[topic] ||= {}
55
58
 
59
+ last_processed_offset = @processed_offsets[topic][partition] || -1
60
+ if last_processed_offset > offset + 1
61
+ @logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
62
+ return
63
+ end
64
+
65
+ @uncommitted_offsets += 1
66
+
56
67
  # The committed offset should always be the offset of the next message that the
57
68
  # application will read, thus adding one to the last message processed.
58
69
  @processed_offsets[topic][partition] = offset + 1
@@ -1,11 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "zlib"
3
+ require "kafka/digest"
4
4
 
5
5
  module Kafka
6
6
 
7
7
  # Assigns partitions to messages.
8
8
  class Partitioner
9
+ # @param hash_function [Symbol, nil] the algorithm used to compute a messages
10
+ # destination partition. Default is :crc32
11
+ def initialize(hash_function: nil)
12
+ @digest = Digest.find_digest(hash_function || :crc32)
13
+ end
9
14
 
10
15
  # Assigns a partition number based on a partition key. If no explicit
11
16
  # partition key is provided, the message key will be used instead.
@@ -19,7 +24,7 @@ module Kafka
19
24
  # @param message [Kafka::PendingMessage] the message that should be assigned
20
25
  # a partition.
21
26
  # @return [Integer] the partition number.
22
- def self.partition_for_key(partition_count, message)
27
+ def call(partition_count, message)
23
28
  raise ArgumentError if partition_count == 0
24
29
 
25
30
  # If no explicit partition key is specified we use the message key instead.
@@ -28,7 +33,7 @@ module Kafka
28
33
  if key.nil?
29
34
  rand(partition_count)
30
35
  else
31
- Zlib.crc32(key) % partition_count
36
+ @digest.hash(key) % partition_count
32
37
  end
33
38
  end
34
39
  end
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
7
7
  require "kafka/pending_message_queue"
8
8
  require "kafka/pending_message"
9
9
  require "kafka/compressor"
10
+ require "kafka/interceptors"
10
11
 
11
12
  module Kafka
12
13
  # Allows sending messages to a Kafka cluster.
@@ -129,7 +130,9 @@ module Kafka
129
130
  class Producer
130
131
  class AbortTransaction < StandardError; end
131
132
 
132
- def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
133
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
134
+ required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
135
+ max_buffer_bytesize:, partitioner:, interceptors: [])
133
136
  @cluster = cluster
134
137
  @transaction_manager = transaction_manager
135
138
  @logger = TaggedLogger.new(logger)
@@ -141,6 +144,8 @@ module Kafka
141
144
  @max_buffer_size = max_buffer_size
142
145
  @max_buffer_bytesize = max_buffer_bytesize
143
146
  @compressor = compressor
147
+ @partitioner = partitioner
148
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
144
149
 
145
150
  # The set of topics that are produced to.
146
151
  @target_topics = Set.new
@@ -191,7 +196,7 @@ module Kafka
191
196
  # We want to fail fast if `topic` isn't a String
192
197
  topic = topic.to_str
193
198
 
194
- message = PendingMessage.new(
199
+ message = @interceptors.call(PendingMessage.new(
195
200
  value: value && value.to_s,
196
201
  key: key && key.to_s,
197
202
  headers: headers,
@@ -199,7 +204,7 @@ module Kafka
199
204
  partition: partition && Integer(partition),
200
205
  partition_key: partition_key && partition_key.to_s,
201
206
  create_time: create_time
202
- )
207
+ ))
203
208
 
204
209
  if buffer_size >= @max_buffer_size
205
210
  buffer_overflow topic,
@@ -455,7 +460,7 @@ module Kafka
455
460
 
456
461
  if partition.nil?
457
462
  partition_count = @cluster.partitions_for(message.topic).count
458
- partition = Partitioner.partition_for_key(partition_count, message)
463
+ partition = @partitioner.call(partition_count, message)
459
464
  end
460
465
 
461
466
  @buffer.write(
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Kafka
2
4
  module Protocol
3
5
  class AddOffsetsToTxnResponse
@@ -126,7 +126,7 @@ module Kafka
126
126
  # Writes an integer under varints serializing to the IO object.
127
127
  # https://developers.google.com/protocol-buffers/docs/encoding#varints
128
128
  #
129
- # @param string [Integer]
129
+ # @param int [Integer]
130
130
  # @return [nil]
131
131
  def write_varint(int)
132
132
  int = int << 1
@@ -7,14 +7,14 @@ module Kafka
7
7
  class JoinGroupRequest
8
8
  PROTOCOL_TYPE = "consumer"
9
9
 
10
- def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [])
10
+ def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [], protocol_name:, user_data: nil)
11
11
  @group_id = group_id
12
12
  @session_timeout = session_timeout * 1000 # Kafka wants ms.
13
13
  @rebalance_timeout = rebalance_timeout * 1000 # Kafka wants ms.
14
14
  @member_id = member_id || ""
15
15
  @protocol_type = PROTOCOL_TYPE
16
16
  @group_protocols = {
17
- "roundrobin" => ConsumerGroupProtocol.new(topics: topics),
17
+ protocol_name => ConsumerGroupProtocol.new(topics: topics, user_data: user_data),
18
18
  }
19
19
  end
20
20
 
@@ -3,6 +3,8 @@
3
3
  module Kafka
4
4
  module Protocol
5
5
  class JoinGroupResponse
6
+ Metadata = Struct.new(:version, :topics, :user_data)
7
+
6
8
  attr_reader :error_code
7
9
 
8
10
  attr_reader :generation_id, :group_protocol
@@ -25,7 +27,13 @@ module Kafka
25
27
  group_protocol: decoder.string,
26
28
  leader_id: decoder.string,
27
29
  member_id: decoder.string,
28
- members: Hash[decoder.array { [decoder.string, decoder.bytes] }],
30
+ members: Hash[
31
+ decoder.array do
32
+ member_id = decoder.string
33
+ d = Decoder.from_string(decoder.bytes)
34
+ [member_id, Metadata.new(d.int16, d.array { d.string }, d.bytes)]
35
+ end
36
+ ],
29
37
  )
30
38
  end
31
39
  end
@@ -77,7 +77,7 @@ module Kafka
77
77
  record_batch_encoder.write_int8(MAGIC_BYTE)
78
78
 
79
79
  body = encode_record_batch_body
80
- crc = Digest::CRC32c.checksum(body)
80
+ crc = ::Digest::CRC32c.checksum(body)
81
81
 
82
82
  record_batch_encoder.write_int32(crc)
83
83
  record_batch_encoder.write(body)
@@ -213,7 +213,7 @@ module Kafka
213
213
  end
214
214
 
215
215
  def mark_control_record
216
- if in_transaction && is_control_batch
216
+ if is_control_batch
217
217
  record = @records.first
218
218
  record.is_control_record = true unless record.nil?
219
219
  end
@@ -13,9 +13,12 @@ module Kafka
13
13
  end
14
14
 
15
15
  def self.decode(decoder)
16
+ error_code = decoder.int16
17
+ member_assignment_bytes = decoder.bytes
18
+
16
19
  new(
17
- error_code: decoder.int16,
18
- member_assignment: MemberAssignment.decode(Decoder.from_string(decoder.bytes)),
20
+ error_code: error_code,
21
+ member_assignment: member_assignment_bytes ? MemberAssignment.decode(Decoder.from_string(member_assignment_bytes)) : nil
19
22
  )
20
23
  end
21
24
  end
@@ -1,17 +1,46 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Kafka
2
4
  module Protocol
3
5
  class TxnOffsetCommitResponse
6
+ class PartitionError
7
+ attr_reader :partition, :error_code
8
+
9
+ def initialize(partition:, error_code:)
10
+ @partition = partition
11
+ @error_code = error_code
12
+ end
13
+ end
14
+
15
+ class TopicPartitionsError
16
+ attr_reader :topic, :partitions
17
+
18
+ def initialize(topic:, partitions:)
19
+ @topic = topic
20
+ @partitions = partitions
21
+ end
22
+ end
4
23
 
5
- attr_reader :error_code
24
+ attr_reader :errors
6
25
 
7
- def initialize(error_code:)
8
- @error_code = error_code
26
+ def initialize(errors:)
27
+ @errors = errors
9
28
  end
10
29
 
11
30
  def self.decode(decoder)
12
31
  _throttle_time_ms = decoder.int32
13
- error_code = decoder.int16
14
- new(error_code: error_code)
32
+ errors = decoder.array do
33
+ TopicPartitionsError.new(
34
+ topic: decoder.string,
35
+ partitions: decoder.array do
36
+ PartitionError.new(
37
+ partition: decoder.int32,
38
+ error_code: decoder.int16
39
+ )
40
+ end
41
+ )
42
+ end
43
+ new(errors: errors)
15
44
  end
16
45
  end
17
46
  end
@@ -1,54 +1,52 @@
1
- # frozen_string_literal: true
2
-
3
- require "kafka/protocol/member_assignment"
4
-
5
1
  module Kafka
6
2
 
7
- # A consumer group partition assignment strategy that assigns partitions to
8
- # consumers in a round-robin fashion.
3
+ # A round robin assignment strategy inpired on the
4
+ # original java client round robin assignor. It's capable
5
+ # of handling identical as well as different topic subscriptions
6
+ # accross the same consumer group.
9
7
  class RoundRobinAssignmentStrategy
10
- def initialize(cluster:)
11
- @cluster = cluster
8
+ def protocol_name
9
+ "roundrobin"
12
10
  end
13
11
 
14
12
  # Assign the topic partitions to the group members.
15
13
  #
16
- # @param members [Array<String>] member ids
17
- # @param topics [Array<String>] topics
18
- # @return [Hash<String, Protocol::MemberAssignment>] a hash mapping member
19
- # ids to assignments.
20
- def assign(members:, topics:)
21
- group_assignment = {}
22
-
23
- members.each do |member_id|
24
- group_assignment[member_id] = Protocol::MemberAssignment.new
25
- end
26
-
27
- topic_partitions = topics.flat_map do |topic|
28
- begin
29
- partitions = @cluster.partitions_for(topic).map(&:partition_id)
30
- rescue UnknownTopicOrPartition
31
- raise UnknownTopicOrPartition, "unknown topic #{topic}"
14
+ # @param cluster [Kafka::Cluster]
15
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
16
+ # mapping member ids to metadata
17
+ # @param partitions [Array<Kafka::ConsumerGroup::Assignor::Partition>] a list of
18
+ # partitions the consumer group processes
19
+ # @return [Hash<String, Array<Kafka::ConsumerGroup::Assignor::Partition>] a hash
20
+ # mapping member ids to partitions.
21
+ def call(cluster:, members:, partitions:)
22
+ partitions_per_member = Hash.new {|h, k| h[k] = [] }
23
+ relevant_partitions = valid_sorted_partitions(members, partitions)
24
+ members_ids = members.keys
25
+ iterator = (0...members.size).cycle
26
+ idx = iterator.next
27
+
28
+ relevant_partitions.each do |partition|
29
+ topic = partition.topic
30
+
31
+ while !members[members_ids[idx]].topics.include?(topic)
32
+ idx = iterator.next
32
33
  end
33
- Array.new(partitions.count) { topic }.zip(partitions)
34
+
35
+ partitions_per_member[members_ids[idx]] << partition
36
+ idx = iterator.next
34
37
  end
35
38
 
36
- partitions_per_member = topic_partitions.group_by.with_index do |_, index|
37
- index % members.count
38
- end.values
39
+ partitions_per_member
40
+ end
39
41
 
40
- members.zip(partitions_per_member).each do |member_id, member_partitions|
41
- unless member_partitions.nil?
42
- member_partitions.each do |topic, partition|
43
- group_assignment[member_id].assign(topic, [partition])
44
- end
45
- end
46
- end
42
+ def valid_sorted_partitions(members, partitions)
43
+ subscribed_topics = members.map do |id, metadata|
44
+ metadata && metadata.topics
45
+ end.flatten.compact
47
46
 
48
- group_assignment
49
- rescue Kafka::LeaderNotAvailable
50
- sleep 1
51
- retry
47
+ partitions
48
+ .select { |partition| subscribed_topics.include?(partition.topic) }
49
+ .sort_by { |partition| partition.topic }
52
50
  end
53
51
  end
54
52
  end