ruby-kafka 1.1.0.beta1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +111 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +21 -0
- data/README.md +141 -0
- data/lib/kafka/async_producer.rb +57 -42
- data/lib/kafka/client.rb +41 -7
- data/lib/kafka/cluster.rb +30 -24
- data/lib/kafka/consumer.rb +8 -3
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/consumer_group.rb +26 -6
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +12 -3
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/partitioner.rb +8 -3
- data/lib/kafka/producer.rb +9 -4
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +2 -0
- data/lib/kafka/protocol/encoder.rb +1 -1
- data/lib/kafka/protocol/join_group_request.rb +2 -2
- data/lib/kafka/protocol/join_group_response.rb +9 -1
- data/lib/kafka/protocol/record_batch.rb +2 -2
- data/lib/kafka/protocol/sync_group_response.rb +5 -2
- data/lib/kafka/protocol/txn_offset_commit_response.rb +34 -5
- data/lib/kafka/round_robin_assignment_strategy.rb +37 -39
- data/lib/kafka/ssl_context.rb +6 -5
- data/lib/kafka/transaction_manager.rb +30 -10
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +2 -1
- metadata +25 -7
data/lib/kafka/datadog.rb
CHANGED
@@ -31,7 +31,7 @@ module Kafka
|
|
31
31
|
|
32
32
|
class << self
|
33
33
|
def statsd
|
34
|
-
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
|
34
|
+
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
|
35
35
|
end
|
36
36
|
|
37
37
|
def statsd=(statsd)
|
@@ -57,6 +57,15 @@ module Kafka
|
|
57
57
|
clear
|
58
58
|
end
|
59
59
|
|
60
|
+
def socket_path
|
61
|
+
@socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
def socket_path=(socket_path)
|
65
|
+
@socket_path = socket_path
|
66
|
+
clear
|
67
|
+
end
|
68
|
+
|
60
69
|
def namespace
|
61
70
|
@namespace ||= STATSD_NAMESPACE
|
62
71
|
end
|
@@ -87,8 +96,8 @@ module Kafka
|
|
87
96
|
private
|
88
97
|
|
89
98
|
%w[increment histogram count timing gauge].each do |type|
|
90
|
-
define_method(type) do |*args|
|
91
|
-
emit(type, *args)
|
99
|
+
define_method(type) do |*args, **kwargs|
|
100
|
+
emit(type, *args, **kwargs)
|
92
101
|
end
|
93
102
|
end
|
94
103
|
|
data/lib/kafka/digest.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/crc32_hash"
|
4
|
+
require "kafka/murmur2_hash"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
module Digest
|
8
|
+
FUNCTIONS_BY_NAME = {
|
9
|
+
:crc32 => Crc32Hash.new,
|
10
|
+
:murmur2 => Murmur2Hash.new
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
def self.find_digest(name)
|
14
|
+
digest = FUNCTIONS_BY_NAME.fetch(name) do
|
15
|
+
raise LoadError, "Unknown hash function #{name}"
|
16
|
+
end
|
17
|
+
|
18
|
+
digest.load
|
19
|
+
digest
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Holds a list of interceptors that implement `call`
|
5
|
+
# and wraps calls to a chain of custom interceptors.
|
6
|
+
class Interceptors
|
7
|
+
def initialize(interceptors:, logger:)
|
8
|
+
@interceptors = interceptors || []
|
9
|
+
@logger = TaggedLogger.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This method is called when the client produces a message or once the batches are fetched.
|
13
|
+
# The message returned from the first call is passed to the second interceptor call, and so on in an
|
14
|
+
# interceptor chain. This method does not throw exceptions.
|
15
|
+
#
|
16
|
+
# @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
|
17
|
+
# fetched batch.
|
18
|
+
#
|
19
|
+
# @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
|
20
|
+
# returned by the last interceptor.
|
21
|
+
def call(intercepted)
|
22
|
+
@interceptors.each do |interceptor|
|
23
|
+
begin
|
24
|
+
intercepted = interceptor.call(intercepted)
|
25
|
+
rescue Exception => e
|
26
|
+
@logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
intercepted
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Murmur2Hash
|
5
|
+
SEED = [0x9747b28c].pack('L')
|
6
|
+
|
7
|
+
def load
|
8
|
+
require 'digest/murmurhash'
|
9
|
+
rescue LoadError
|
10
|
+
raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
|
11
|
+
end
|
12
|
+
|
13
|
+
def hash(value)
|
14
|
+
::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/kafka/offset_manager.rb
CHANGED
@@ -50,9 +50,20 @@ module Kafka
|
|
50
50
|
# @param offset [Integer] the offset of the message that should be marked as processed.
|
51
51
|
# @return [nil]
|
52
52
|
def mark_as_processed(topic, partition, offset)
|
53
|
-
@
|
53
|
+
unless @group.assigned_to?(topic, partition)
|
54
|
+
@logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
|
55
|
+
return
|
56
|
+
end
|
54
57
|
@processed_offsets[topic] ||= {}
|
55
58
|
|
59
|
+
last_processed_offset = @processed_offsets[topic][partition] || -1
|
60
|
+
if last_processed_offset > offset + 1
|
61
|
+
@logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
@uncommitted_offsets += 1
|
66
|
+
|
56
67
|
# The committed offset should always be the offset of the next message that the
|
57
68
|
# application will read, thus adding one to the last message processed.
|
58
69
|
@processed_offsets[topic][partition] = offset + 1
|
data/lib/kafka/partitioner.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "
|
3
|
+
require "kafka/digest"
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
|
7
7
|
# Assigns partitions to messages.
|
8
8
|
class Partitioner
|
9
|
+
# @param hash_function [Symbol, nil] the algorithm used to compute a messages
|
10
|
+
# destination partition. Default is :crc32
|
11
|
+
def initialize(hash_function: nil)
|
12
|
+
@digest = Digest.find_digest(hash_function || :crc32)
|
13
|
+
end
|
9
14
|
|
10
15
|
# Assigns a partition number based on a partition key. If no explicit
|
11
16
|
# partition key is provided, the message key will be used instead.
|
@@ -19,7 +24,7 @@ module Kafka
|
|
19
24
|
# @param message [Kafka::PendingMessage] the message that should be assigned
|
20
25
|
# a partition.
|
21
26
|
# @return [Integer] the partition number.
|
22
|
-
def
|
27
|
+
def call(partition_count, message)
|
23
28
|
raise ArgumentError if partition_count == 0
|
24
29
|
|
25
30
|
# If no explicit partition key is specified we use the message key instead.
|
@@ -28,7 +33,7 @@ module Kafka
|
|
28
33
|
if key.nil?
|
29
34
|
rand(partition_count)
|
30
35
|
else
|
31
|
-
|
36
|
+
@digest.hash(key) % partition_count
|
32
37
|
end
|
33
38
|
end
|
34
39
|
end
|
data/lib/kafka/producer.rb
CHANGED
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
|
|
7
7
|
require "kafka/pending_message_queue"
|
8
8
|
require "kafka/pending_message"
|
9
9
|
require "kafka/compressor"
|
10
|
+
require "kafka/interceptors"
|
10
11
|
|
11
12
|
module Kafka
|
12
13
|
# Allows sending messages to a Kafka cluster.
|
@@ -129,7 +130,9 @@ module Kafka
|
|
129
130
|
class Producer
|
130
131
|
class AbortTransaction < StandardError; end
|
131
132
|
|
132
|
-
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
133
|
+
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
134
|
+
required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
|
135
|
+
max_buffer_bytesize:, partitioner:, interceptors: [])
|
133
136
|
@cluster = cluster
|
134
137
|
@transaction_manager = transaction_manager
|
135
138
|
@logger = TaggedLogger.new(logger)
|
@@ -141,6 +144,8 @@ module Kafka
|
|
141
144
|
@max_buffer_size = max_buffer_size
|
142
145
|
@max_buffer_bytesize = max_buffer_bytesize
|
143
146
|
@compressor = compressor
|
147
|
+
@partitioner = partitioner
|
148
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
144
149
|
|
145
150
|
# The set of topics that are produced to.
|
146
151
|
@target_topics = Set.new
|
@@ -191,7 +196,7 @@ module Kafka
|
|
191
196
|
# We want to fail fast if `topic` isn't a String
|
192
197
|
topic = topic.to_str
|
193
198
|
|
194
|
-
message = PendingMessage.new(
|
199
|
+
message = @interceptors.call(PendingMessage.new(
|
195
200
|
value: value && value.to_s,
|
196
201
|
key: key && key.to_s,
|
197
202
|
headers: headers,
|
@@ -199,7 +204,7 @@ module Kafka
|
|
199
204
|
partition: partition && Integer(partition),
|
200
205
|
partition_key: partition_key && partition_key.to_s,
|
201
206
|
create_time: create_time
|
202
|
-
)
|
207
|
+
))
|
203
208
|
|
204
209
|
if buffer_size >= @max_buffer_size
|
205
210
|
buffer_overflow topic,
|
@@ -455,7 +460,7 @@ module Kafka
|
|
455
460
|
|
456
461
|
if partition.nil?
|
457
462
|
partition_count = @cluster.partitions_for(message.topic).count
|
458
|
-
partition =
|
463
|
+
partition = @partitioner.call(partition_count, message)
|
459
464
|
end
|
460
465
|
|
461
466
|
@buffer.write(
|
@@ -126,7 +126,7 @@ module Kafka
|
|
126
126
|
# Writes an integer under varints serializing to the IO object.
|
127
127
|
# https://developers.google.com/protocol-buffers/docs/encoding#varints
|
128
128
|
#
|
129
|
-
# @param
|
129
|
+
# @param int [Integer]
|
130
130
|
# @return [nil]
|
131
131
|
def write_varint(int)
|
132
132
|
int = int << 1
|
@@ -7,14 +7,14 @@ module Kafka
|
|
7
7
|
class JoinGroupRequest
|
8
8
|
PROTOCOL_TYPE = "consumer"
|
9
9
|
|
10
|
-
def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [])
|
10
|
+
def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [], protocol_name:, user_data: nil)
|
11
11
|
@group_id = group_id
|
12
12
|
@session_timeout = session_timeout * 1000 # Kafka wants ms.
|
13
13
|
@rebalance_timeout = rebalance_timeout * 1000 # Kafka wants ms.
|
14
14
|
@member_id = member_id || ""
|
15
15
|
@protocol_type = PROTOCOL_TYPE
|
16
16
|
@group_protocols = {
|
17
|
-
|
17
|
+
protocol_name => ConsumerGroupProtocol.new(topics: topics, user_data: user_data),
|
18
18
|
}
|
19
19
|
end
|
20
20
|
|
@@ -3,6 +3,8 @@
|
|
3
3
|
module Kafka
|
4
4
|
module Protocol
|
5
5
|
class JoinGroupResponse
|
6
|
+
Metadata = Struct.new(:version, :topics, :user_data)
|
7
|
+
|
6
8
|
attr_reader :error_code
|
7
9
|
|
8
10
|
attr_reader :generation_id, :group_protocol
|
@@ -25,7 +27,13 @@ module Kafka
|
|
25
27
|
group_protocol: decoder.string,
|
26
28
|
leader_id: decoder.string,
|
27
29
|
member_id: decoder.string,
|
28
|
-
members: Hash[
|
30
|
+
members: Hash[
|
31
|
+
decoder.array do
|
32
|
+
member_id = decoder.string
|
33
|
+
d = Decoder.from_string(decoder.bytes)
|
34
|
+
[member_id, Metadata.new(d.int16, d.array { d.string }, d.bytes)]
|
35
|
+
end
|
36
|
+
],
|
29
37
|
)
|
30
38
|
end
|
31
39
|
end
|
@@ -77,7 +77,7 @@ module Kafka
|
|
77
77
|
record_batch_encoder.write_int8(MAGIC_BYTE)
|
78
78
|
|
79
79
|
body = encode_record_batch_body
|
80
|
-
crc = Digest::CRC32c.checksum(body)
|
80
|
+
crc = ::Digest::CRC32c.checksum(body)
|
81
81
|
|
82
82
|
record_batch_encoder.write_int32(crc)
|
83
83
|
record_batch_encoder.write(body)
|
@@ -213,7 +213,7 @@ module Kafka
|
|
213
213
|
end
|
214
214
|
|
215
215
|
def mark_control_record
|
216
|
-
if
|
216
|
+
if is_control_batch
|
217
217
|
record = @records.first
|
218
218
|
record.is_control_record = true unless record.nil?
|
219
219
|
end
|
@@ -13,9 +13,12 @@ module Kafka
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def self.decode(decoder)
|
16
|
+
error_code = decoder.int16
|
17
|
+
member_assignment_bytes = decoder.bytes
|
18
|
+
|
16
19
|
new(
|
17
|
-
error_code:
|
18
|
-
member_assignment: MemberAssignment.decode(Decoder.from_string(
|
20
|
+
error_code: error_code,
|
21
|
+
member_assignment: member_assignment_bytes ? MemberAssignment.decode(Decoder.from_string(member_assignment_bytes)) : nil
|
19
22
|
)
|
20
23
|
end
|
21
24
|
end
|
@@ -1,17 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Kafka
|
2
4
|
module Protocol
|
3
5
|
class TxnOffsetCommitResponse
|
6
|
+
class PartitionError
|
7
|
+
attr_reader :partition, :error_code
|
8
|
+
|
9
|
+
def initialize(partition:, error_code:)
|
10
|
+
@partition = partition
|
11
|
+
@error_code = error_code
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class TopicPartitionsError
|
16
|
+
attr_reader :topic, :partitions
|
17
|
+
|
18
|
+
def initialize(topic:, partitions:)
|
19
|
+
@topic = topic
|
20
|
+
@partitions = partitions
|
21
|
+
end
|
22
|
+
end
|
4
23
|
|
5
|
-
attr_reader :
|
24
|
+
attr_reader :errors
|
6
25
|
|
7
|
-
def initialize(
|
8
|
-
@
|
26
|
+
def initialize(errors:)
|
27
|
+
@errors = errors
|
9
28
|
end
|
10
29
|
|
11
30
|
def self.decode(decoder)
|
12
31
|
_throttle_time_ms = decoder.int32
|
13
|
-
|
14
|
-
|
32
|
+
errors = decoder.array do
|
33
|
+
TopicPartitionsError.new(
|
34
|
+
topic: decoder.string,
|
35
|
+
partitions: decoder.array do
|
36
|
+
PartitionError.new(
|
37
|
+
partition: decoder.int32,
|
38
|
+
error_code: decoder.int16
|
39
|
+
)
|
40
|
+
end
|
41
|
+
)
|
42
|
+
end
|
43
|
+
new(errors: errors)
|
15
44
|
end
|
16
45
|
end
|
17
46
|
end
|
@@ -1,54 +1,52 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "kafka/protocol/member_assignment"
|
4
|
-
|
5
1
|
module Kafka
|
6
2
|
|
7
|
-
# A
|
8
|
-
#
|
3
|
+
# A round robin assignment strategy inpired on the
|
4
|
+
# original java client round robin assignor. It's capable
|
5
|
+
# of handling identical as well as different topic subscriptions
|
6
|
+
# accross the same consumer group.
|
9
7
|
class RoundRobinAssignmentStrategy
|
10
|
-
def
|
11
|
-
|
8
|
+
def protocol_name
|
9
|
+
"roundrobin"
|
12
10
|
end
|
13
11
|
|
14
12
|
# Assign the topic partitions to the group members.
|
15
13
|
#
|
16
|
-
# @param
|
17
|
-
# @param
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
14
|
+
# @param cluster [Kafka::Cluster]
|
15
|
+
# @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
|
16
|
+
# mapping member ids to metadata
|
17
|
+
# @param partitions [Array<Kafka::ConsumerGroup::Assignor::Partition>] a list of
|
18
|
+
# partitions the consumer group processes
|
19
|
+
# @return [Hash<String, Array<Kafka::ConsumerGroup::Assignor::Partition>] a hash
|
20
|
+
# mapping member ids to partitions.
|
21
|
+
def call(cluster:, members:, partitions:)
|
22
|
+
partitions_per_member = Hash.new {|h, k| h[k] = [] }
|
23
|
+
relevant_partitions = valid_sorted_partitions(members, partitions)
|
24
|
+
members_ids = members.keys
|
25
|
+
iterator = (0...members.size).cycle
|
26
|
+
idx = iterator.next
|
27
|
+
|
28
|
+
relevant_partitions.each do |partition|
|
29
|
+
topic = partition.topic
|
30
|
+
|
31
|
+
while !members[members_ids[idx]].topics.include?(topic)
|
32
|
+
idx = iterator.next
|
32
33
|
end
|
33
|
-
|
34
|
+
|
35
|
+
partitions_per_member[members_ids[idx]] << partition
|
36
|
+
idx = iterator.next
|
34
37
|
end
|
35
38
|
|
36
|
-
partitions_per_member
|
37
|
-
|
38
|
-
end.values
|
39
|
+
partitions_per_member
|
40
|
+
end
|
39
41
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
42
|
+
def valid_sorted_partitions(members, partitions)
|
43
|
+
subscribed_topics = members.map do |id, metadata|
|
44
|
+
metadata && metadata.topics
|
45
|
+
end.flatten.compact
|
47
46
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
retry
|
47
|
+
partitions
|
48
|
+
.select { |partition| subscribed_topics.include?(partition.topic) }
|
49
|
+
.sort_by { |partition| partition.topic }
|
52
50
|
end
|
53
51
|
end
|
54
52
|
end
|