ruby-kafka 0.7.6 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +135 -3
- data/.github/workflows/stale.yml +19 -0
- data/CHANGELOG.md +34 -0
- data/README.md +27 -0
- data/lib/kafka/async_producer.rb +3 -0
- data/lib/kafka/broker.rb +12 -0
- data/lib/kafka/client.rb +53 -4
- data/lib/kafka/cluster.rb +52 -0
- data/lib/kafka/compression.rb +13 -11
- data/lib/kafka/compressor.rb +1 -0
- data/lib/kafka/connection.rb +3 -0
- data/lib/kafka/consumer.rb +56 -11
- data/lib/kafka/consumer_group.rb +10 -1
- data/lib/kafka/datadog.rb +18 -11
- data/lib/kafka/fetched_batch.rb +5 -1
- data/lib/kafka/fetched_batch_generator.rb +4 -1
- data/lib/kafka/fetched_message.rb +1 -0
- data/lib/kafka/fetcher.rb +5 -2
- data/lib/kafka/gzip_codec.rb +4 -0
- data/lib/kafka/lz4_codec.rb +4 -0
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/producer.rb +20 -1
- data/lib/kafka/prometheus.rb +316 -0
- data/lib/kafka/protocol.rb +8 -0
- data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +19 -0
- data/lib/kafka/protocol/join_group_request.rb +8 -2
- data/lib/kafka/protocol/metadata_response.rb +1 -1
- data/lib/kafka/protocol/offset_fetch_request.rb +3 -1
- data/lib/kafka/protocol/produce_request.rb +3 -1
- data/lib/kafka/protocol/record_batch.rb +5 -4
- data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
- data/lib/kafka/protocol/txn_offset_commit_response.rb +18 -0
- data/lib/kafka/sasl/scram.rb +15 -12
- data/lib/kafka/snappy_codec.rb +4 -0
- data/lib/kafka/ssl_context.rb +5 -1
- data/lib/kafka/ssl_socket_with_timeout.rb +1 -0
- data/lib/kafka/tagged_logger.rb +25 -20
- data/lib/kafka/transaction_manager.rb +25 -0
- data/lib/kafka/version.rb +1 -1
- data/lib/kafka/zstd_codec.rb +27 -0
- data/ruby-kafka.gemspec +5 -3
- metadata +48 -7
data/lib/kafka/cluster.rb
CHANGED
@@ -45,6 +45,10 @@ module Kafka
|
|
45
45
|
new_topics = topics - @target_topics
|
46
46
|
|
47
47
|
unless new_topics.empty?
|
48
|
+
if new_topics.any? { |topic| topic.nil? or topic.empty? }
|
49
|
+
raise ArgumentError, "Topic must not be nil or empty"
|
50
|
+
end
|
51
|
+
|
48
52
|
@logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
|
49
53
|
|
50
54
|
@target_topics.merge(new_topics)
|
@@ -139,6 +143,40 @@ module Kafka
|
|
139
143
|
end
|
140
144
|
end
|
141
145
|
|
146
|
+
def describe_configs(broker_id, configs = [])
|
147
|
+
options = {
|
148
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
|
149
|
+
}
|
150
|
+
|
151
|
+
info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
|
152
|
+
broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
153
|
+
|
154
|
+
response = broker.describe_configs(**options)
|
155
|
+
|
156
|
+
response.resources.each do |resource|
|
157
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
158
|
+
end
|
159
|
+
|
160
|
+
response.resources.first.configs
|
161
|
+
end
|
162
|
+
|
163
|
+
def alter_configs(broker_id, configs = [])
|
164
|
+
options = {
|
165
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
|
166
|
+
}
|
167
|
+
|
168
|
+
info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
|
169
|
+
broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
170
|
+
|
171
|
+
response = broker.alter_configs(**options)
|
172
|
+
|
173
|
+
response.resources.each do |resource|
|
174
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
175
|
+
end
|
176
|
+
|
177
|
+
nil
|
178
|
+
end
|
179
|
+
|
142
180
|
def partitions_for(topic)
|
143
181
|
add_target_topics([topic])
|
144
182
|
refresh_metadata_if_necessary!
|
@@ -252,6 +290,20 @@ module Kafka
|
|
252
290
|
group
|
253
291
|
end
|
254
292
|
|
293
|
+
def fetch_group_offsets(group_id)
|
294
|
+
topics = get_group_coordinator(group_id: group_id)
|
295
|
+
.fetch_offsets(group_id: group_id, topics: nil)
|
296
|
+
.topics
|
297
|
+
|
298
|
+
topics.each do |_, partitions|
|
299
|
+
partitions.each do |_, response|
|
300
|
+
Protocol.handle_error(response.error_code)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
topics
|
305
|
+
end
|
306
|
+
|
255
307
|
def create_partitions_for(name, num_partitions:, timeout:)
|
256
308
|
options = {
|
257
309
|
topics: [[name, num_partitions, nil]],
|
data/lib/kafka/compression.rb
CHANGED
@@ -3,27 +3,27 @@
|
|
3
3
|
require "kafka/snappy_codec"
|
4
4
|
require "kafka/gzip_codec"
|
5
5
|
require "kafka/lz4_codec"
|
6
|
+
require "kafka/zstd_codec"
|
6
7
|
|
7
8
|
module Kafka
|
8
9
|
module Compression
|
9
|
-
|
10
|
-
1 => :gzip,
|
11
|
-
2 => :snappy,
|
12
|
-
3 => :lz4,
|
13
|
-
}.freeze
|
14
|
-
|
15
|
-
CODECS = {
|
10
|
+
CODECS_BY_NAME = {
|
16
11
|
:gzip => GzipCodec.new,
|
17
12
|
:snappy => SnappyCodec.new,
|
18
13
|
:lz4 => LZ4Codec.new,
|
14
|
+
:zstd => ZstdCodec.new,
|
19
15
|
}.freeze
|
20
16
|
|
17
|
+
CODECS_BY_ID = CODECS_BY_NAME.each_with_object({}) do |(_, codec), hash|
|
18
|
+
hash[codec.codec_id] = codec
|
19
|
+
end.freeze
|
20
|
+
|
21
21
|
def self.codecs
|
22
|
-
|
22
|
+
CODECS_BY_NAME.keys
|
23
23
|
end
|
24
24
|
|
25
25
|
def self.find_codec(name)
|
26
|
-
codec =
|
26
|
+
codec = CODECS_BY_NAME.fetch(name) do
|
27
27
|
raise "Unknown compression codec #{name}"
|
28
28
|
end
|
29
29
|
|
@@ -33,11 +33,13 @@ module Kafka
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.find_codec_by_id(codec_id)
|
36
|
-
|
36
|
+
codec = CODECS_BY_ID.fetch(codec_id) do
|
37
37
|
raise "Unknown codec id #{codec_id}"
|
38
38
|
end
|
39
39
|
|
40
|
-
|
40
|
+
codec.load
|
41
|
+
|
42
|
+
codec
|
41
43
|
end
|
42
44
|
end
|
43
45
|
end
|
data/lib/kafka/compressor.rb
CHANGED
data/lib/kafka/connection.rb
CHANGED
data/lib/kafka/consumer.rb
CHANGED
@@ -44,7 +44,7 @@ module Kafka
|
|
44
44
|
#
|
45
45
|
class Consumer
|
46
46
|
|
47
|
-
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
|
47
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:, refresh_topic_interval: 0)
|
48
48
|
@cluster = cluster
|
49
49
|
@logger = TaggedLogger.new(logger)
|
50
50
|
@instrumenter = instrumenter
|
@@ -53,6 +53,7 @@ module Kafka
|
|
53
53
|
@session_timeout = session_timeout
|
54
54
|
@fetcher = fetcher
|
55
55
|
@heartbeat = heartbeat
|
56
|
+
@refresh_topic_interval = refresh_topic_interval
|
56
57
|
|
57
58
|
@pauses = Hash.new {|h, k|
|
58
59
|
h[k] = Hash.new {|h2, k2|
|
@@ -73,6 +74,15 @@ module Kafka
|
|
73
74
|
# when user commits message other than last in a batch, this would make ruby-kafka refetch
|
74
75
|
# some already consumed messages
|
75
76
|
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
77
|
+
|
78
|
+
# Map storing subscribed topics with their configuration
|
79
|
+
@subscribed_topics = Concurrent::Map.new
|
80
|
+
|
81
|
+
# Set storing topics that matched topics in @subscribed_topics
|
82
|
+
@matched_topics = Set.new
|
83
|
+
|
84
|
+
# Whether join_group must be executed again because new topics are added
|
85
|
+
@join_group_for_new_topics = false
|
76
86
|
end
|
77
87
|
|
78
88
|
# Subscribes the consumer to a topic.
|
@@ -97,13 +107,12 @@ module Kafka
|
|
97
107
|
def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
|
98
108
|
default_offset ||= start_from_beginning ? :earliest : :latest
|
99
109
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
110
|
+
@subscribed_topics[topic_or_regex] = {
|
111
|
+
default_offset: default_offset,
|
112
|
+
start_from_beginning: start_from_beginning,
|
113
|
+
max_bytes_per_partition: max_bytes_per_partition
|
114
|
+
}
|
115
|
+
scan_for_subscribing
|
107
116
|
|
108
117
|
nil
|
109
118
|
end
|
@@ -116,7 +125,6 @@ module Kafka
|
|
116
125
|
def stop
|
117
126
|
@running = false
|
118
127
|
@fetcher.stop
|
119
|
-
@cluster.disconnect
|
120
128
|
end
|
121
129
|
|
122
130
|
# Pause processing of a specific topic partition.
|
@@ -308,6 +316,7 @@ module Kafka
|
|
308
316
|
topic: batch.topic,
|
309
317
|
partition: batch.partition,
|
310
318
|
last_offset: batch.last_offset,
|
319
|
+
last_create_time: batch.messages.last.try(:create_time),
|
311
320
|
offset_lag: batch.offset_lag,
|
312
321
|
highwater_mark_offset: batch.highwater_mark_offset,
|
313
322
|
message_count: batch.messages.count,
|
@@ -401,6 +410,7 @@ module Kafka
|
|
401
410
|
while running?
|
402
411
|
begin
|
403
412
|
@instrumenter.instrument("loop.consumer") do
|
413
|
+
refresh_topic_list_if_enabled
|
404
414
|
yield
|
405
415
|
end
|
406
416
|
rescue HeartbeatError
|
@@ -432,6 +442,7 @@ module Kafka
|
|
432
442
|
# important that members explicitly tell Kafka when they're leaving.
|
433
443
|
make_final_offsets_commit!
|
434
444
|
@group.leave rescue nil
|
445
|
+
@cluster.disconnect
|
435
446
|
@running = false
|
436
447
|
@logger.pop_tags
|
437
448
|
end
|
@@ -452,6 +463,8 @@ module Kafka
|
|
452
463
|
end
|
453
464
|
|
454
465
|
def join_group
|
466
|
+
@join_group_for_new_topics = false
|
467
|
+
|
455
468
|
old_generation_id = @group.generation_id
|
456
469
|
|
457
470
|
@group.join
|
@@ -513,11 +526,19 @@ module Kafka
|
|
513
526
|
end
|
514
527
|
end
|
515
528
|
|
529
|
+
def refresh_topic_list_if_enabled
|
530
|
+
return if @refresh_topic_interval <= 0
|
531
|
+
return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
|
532
|
+
|
533
|
+
scan_for_subscribing
|
534
|
+
@refreshed_at = Time.now
|
535
|
+
end
|
536
|
+
|
516
537
|
def fetch_batches
|
517
538
|
# Return early if the consumer has been stopped.
|
518
539
|
return [] if shutting_down?
|
519
540
|
|
520
|
-
join_group
|
541
|
+
join_group if !@group.member? || @join_group_for_new_topics
|
521
542
|
|
522
543
|
trigger_heartbeat
|
523
544
|
|
@@ -525,7 +546,7 @@ module Kafka
|
|
525
546
|
|
526
547
|
if !@fetcher.data?
|
527
548
|
@logger.debug "No batches to process"
|
528
|
-
sleep 2
|
549
|
+
sleep(@fetcher.max_wait_time || 2)
|
529
550
|
[]
|
530
551
|
else
|
531
552
|
tag, message = @fetcher.poll
|
@@ -571,10 +592,34 @@ module Kafka
|
|
571
592
|
end
|
572
593
|
end
|
573
594
|
|
595
|
+
def scan_for_subscribing
|
596
|
+
@subscribed_topics.each do |topic_or_regex, config|
|
597
|
+
default_offset = config.fetch(:default_offset)
|
598
|
+
start_from_beginning = config.fetch(:start_from_beginning)
|
599
|
+
max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
|
600
|
+
if topic_or_regex.is_a?(Regexp)
|
601
|
+
subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
602
|
+
else
|
603
|
+
subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
604
|
+
end
|
605
|
+
end
|
606
|
+
end
|
607
|
+
|
608
|
+
def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
609
|
+
cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
|
610
|
+
subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
611
|
+
end
|
612
|
+
end
|
613
|
+
|
574
614
|
def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
615
|
+
return if @matched_topics.include?(topic)
|
616
|
+
@matched_topics.add(topic)
|
617
|
+
@join_group_for_new_topics = true
|
618
|
+
|
575
619
|
@group.subscribe(topic)
|
576
620
|
@offset_manager.set_default_offset(topic, default_offset)
|
577
621
|
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
622
|
+
@cluster.mark_as_stale!
|
578
623
|
end
|
579
624
|
|
580
625
|
def cluster_topics
|
data/lib/kafka/consumer_group.rb
CHANGED
@@ -7,11 +7,12 @@ module Kafka
|
|
7
7
|
class ConsumerGroup
|
8
8
|
attr_reader :assigned_partitions, :generation_id, :group_id
|
9
9
|
|
10
|
-
def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
|
10
|
+
def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:)
|
11
11
|
@cluster = cluster
|
12
12
|
@logger = TaggedLogger.new(logger)
|
13
13
|
@group_id = group_id
|
14
14
|
@session_timeout = session_timeout
|
15
|
+
@rebalance_timeout = rebalance_timeout
|
15
16
|
@instrumenter = instrumenter
|
16
17
|
@member_id = ""
|
17
18
|
@generation_id = nil
|
@@ -140,7 +141,9 @@ module Kafka
|
|
140
141
|
response = coordinator.join_group(
|
141
142
|
group_id: @group_id,
|
142
143
|
session_timeout: @session_timeout,
|
144
|
+
rebalance_timeout: @rebalance_timeout,
|
143
145
|
member_id: @member_id,
|
146
|
+
topics: @topics,
|
144
147
|
)
|
145
148
|
|
146
149
|
Protocol.handle_error(response.error_code)
|
@@ -158,6 +161,12 @@ module Kafka
|
|
158
161
|
@member_id = ""
|
159
162
|
sleep 1
|
160
163
|
|
164
|
+
retry
|
165
|
+
rescue CoordinatorLoadInProgress
|
166
|
+
@logger.error "Coordinator broker still loading, retrying in 1s..."
|
167
|
+
|
168
|
+
sleep 1
|
169
|
+
|
161
170
|
retry
|
162
171
|
end
|
163
172
|
|
data/lib/kafka/datadog.rb
CHANGED
@@ -31,7 +31,7 @@ module Kafka
|
|
31
31
|
|
32
32
|
class << self
|
33
33
|
def statsd
|
34
|
-
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
|
34
|
+
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
|
35
35
|
end
|
36
36
|
|
37
37
|
def statsd=(statsd)
|
@@ -40,7 +40,7 @@ module Kafka
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def host
|
43
|
-
@host
|
43
|
+
@host
|
44
44
|
end
|
45
45
|
|
46
46
|
def host=(host)
|
@@ -49,7 +49,7 @@ module Kafka
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def port
|
52
|
-
@port
|
52
|
+
@port
|
53
53
|
end
|
54
54
|
|
55
55
|
def port=(port)
|
@@ -57,6 +57,15 @@ module Kafka
|
|
57
57
|
clear
|
58
58
|
end
|
59
59
|
|
60
|
+
def socket_path
|
61
|
+
@socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
def socket_path=(socket_path)
|
65
|
+
@socket_path = socket_path
|
66
|
+
clear
|
67
|
+
end
|
68
|
+
|
60
69
|
def namespace
|
61
70
|
@namespace ||= STATSD_NAMESPACE
|
62
71
|
end
|
@@ -77,14 +86,6 @@ module Kafka
|
|
77
86
|
|
78
87
|
private
|
79
88
|
|
80
|
-
def default_host
|
81
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
|
82
|
-
end
|
83
|
-
|
84
|
-
def default_port
|
85
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
|
86
|
-
end
|
87
|
-
|
88
89
|
def clear
|
89
90
|
@statsd && @statsd.close
|
90
91
|
@statsd = nil
|
@@ -168,6 +169,8 @@ module Kafka
|
|
168
169
|
def process_batch(event)
|
169
170
|
offset = event.payload.fetch(:last_offset)
|
170
171
|
messages = event.payload.fetch(:message_count)
|
172
|
+
create_time = event.payload.fetch(:last_create_time)
|
173
|
+
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
171
174
|
|
172
175
|
tags = {
|
173
176
|
client: event.payload.fetch(:client_id),
|
@@ -184,6 +187,10 @@ module Kafka
|
|
184
187
|
end
|
185
188
|
|
186
189
|
gauge("consumer.offset", offset, tags: tags)
|
190
|
+
|
191
|
+
if time_lag
|
192
|
+
gauge("consumer.time_lag", time_lag, tags: tags)
|
193
|
+
end
|
187
194
|
end
|
188
195
|
|
189
196
|
def fetch_batch(event)
|
data/lib/kafka/fetched_batch.rb
CHANGED
@@ -13,18 +13,22 @@ module Kafka
|
|
13
13
|
# @return [Integer]
|
14
14
|
attr_reader :last_offset
|
15
15
|
|
16
|
+
# @return [Integer]
|
17
|
+
attr_reader :leader_epoch
|
18
|
+
|
16
19
|
# @return [Integer] the offset of the most recent message in the partition.
|
17
20
|
attr_reader :highwater_mark_offset
|
18
21
|
|
19
22
|
# @return [Array<Kafka::FetchedMessage>]
|
20
23
|
attr_accessor :messages
|
21
24
|
|
22
|
-
def initialize(topic:, partition:, highwater_mark_offset:, messages:, last_offset: nil)
|
25
|
+
def initialize(topic:, partition:, highwater_mark_offset:, messages:, last_offset: nil, leader_epoch: nil)
|
23
26
|
@topic = topic
|
24
27
|
@partition = partition
|
25
28
|
@highwater_mark_offset = highwater_mark_offset
|
26
29
|
@messages = messages
|
27
30
|
@last_offset = last_offset
|
31
|
+
@leader_epoch = leader_epoch
|
28
32
|
end
|
29
33
|
|
30
34
|
def empty?
|
@@ -48,7 +48,7 @@ module Kafka
|
|
48
48
|
partition: @fetched_partition.partition
|
49
49
|
)
|
50
50
|
end
|
51
|
-
end
|
51
|
+
end.compact
|
52
52
|
end
|
53
53
|
FetchedBatch.new(
|
54
54
|
topic: @topic,
|
@@ -62,11 +62,13 @@ module Kafka
|
|
62
62
|
def extract_records
|
63
63
|
records = []
|
64
64
|
last_offset = nil
|
65
|
+
leader_epoch = nil
|
65
66
|
aborted_transactions = @fetched_partition.aborted_transactions.sort_by(&:first_offset)
|
66
67
|
aborted_producer_ids = {}
|
67
68
|
|
68
69
|
@fetched_partition.messages.each do |record_batch|
|
69
70
|
last_offset = record_batch.last_offset if last_offset.nil? || last_offset < record_batch.last_offset
|
71
|
+
leader_epoch = record_batch.partition_leader_epoch if leader_epoch.nil? || leader_epoch < record_batch.partition_leader_epoch
|
70
72
|
# Find the list of aborted producer IDs less than current offset
|
71
73
|
unless aborted_transactions.empty?
|
72
74
|
if aborted_transactions.first.first_offset <= record_batch.last_offset
|
@@ -99,6 +101,7 @@ module Kafka
|
|
99
101
|
topic: @topic,
|
100
102
|
partition: @fetched_partition.partition,
|
101
103
|
last_offset: last_offset,
|
104
|
+
leader_epoch: leader_epoch,
|
102
105
|
highwater_mark_offset: @fetched_partition.highwater_mark_offset,
|
103
106
|
messages: records
|
104
107
|
)
|