ruby-kafka 0.7.10 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +99 -0
- data/.github/workflows/stale.yml +19 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +27 -0
- data/README.md +125 -0
- data/lib/kafka/async_producer.rb +24 -12
- data/lib/kafka/client.rb +72 -4
- data/lib/kafka/cluster.rb +52 -0
- data/lib/kafka/connection.rb +3 -0
- data/lib/kafka/consumer.rb +61 -11
- data/lib/kafka/consumer_group.rb +23 -5
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/datadog.rb +20 -13
- data/lib/kafka/fetcher.rb +5 -2
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/partitioner.rb +1 -1
- data/lib/kafka/producer.rb +13 -5
- data/lib/kafka/prometheus.rb +78 -79
- data/lib/kafka/protocol/join_group_request.rb +8 -2
- data/lib/kafka/protocol/join_group_response.rb +9 -1
- data/lib/kafka/protocol/metadata_response.rb +1 -1
- data/lib/kafka/protocol/offset_fetch_request.rb +3 -1
- data/lib/kafka/round_robin_assignment_strategy.rb +15 -38
- data/lib/kafka/ssl_context.rb +4 -3
- data/lib/kafka/tagged_logger.rb +1 -0
- data/lib/kafka/transaction_manager.rb +13 -8
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +4 -4
- metadata +21 -13
data/lib/kafka/cluster.rb
CHANGED
@@ -45,6 +45,10 @@ module Kafka
|
|
45
45
|
new_topics = topics - @target_topics
|
46
46
|
|
47
47
|
unless new_topics.empty?
|
48
|
+
if new_topics.any? { |topic| topic.nil? or topic.empty? }
|
49
|
+
raise ArgumentError, "Topic must not be nil or empty"
|
50
|
+
end
|
51
|
+
|
48
52
|
@logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
|
49
53
|
|
50
54
|
@target_topics.merge(new_topics)
|
@@ -139,6 +143,40 @@ module Kafka
|
|
139
143
|
end
|
140
144
|
end
|
141
145
|
|
146
|
+
def describe_configs(broker_id, configs = [])
|
147
|
+
options = {
|
148
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
|
149
|
+
}
|
150
|
+
|
151
|
+
info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
|
152
|
+
broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
153
|
+
|
154
|
+
response = broker.describe_configs(**options)
|
155
|
+
|
156
|
+
response.resources.each do |resource|
|
157
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
158
|
+
end
|
159
|
+
|
160
|
+
response.resources.first.configs
|
161
|
+
end
|
162
|
+
|
163
|
+
def alter_configs(broker_id, configs = [])
|
164
|
+
options = {
|
165
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
|
166
|
+
}
|
167
|
+
|
168
|
+
info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
|
169
|
+
broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
170
|
+
|
171
|
+
response = broker.alter_configs(**options)
|
172
|
+
|
173
|
+
response.resources.each do |resource|
|
174
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
175
|
+
end
|
176
|
+
|
177
|
+
nil
|
178
|
+
end
|
179
|
+
|
142
180
|
def partitions_for(topic)
|
143
181
|
add_target_topics([topic])
|
144
182
|
refresh_metadata_if_necessary!
|
@@ -252,6 +290,20 @@ module Kafka
|
|
252
290
|
group
|
253
291
|
end
|
254
292
|
|
293
|
+
def fetch_group_offsets(group_id)
|
294
|
+
topics = get_group_coordinator(group_id: group_id)
|
295
|
+
.fetch_offsets(group_id: group_id, topics: nil)
|
296
|
+
.topics
|
297
|
+
|
298
|
+
topics.each do |_, partitions|
|
299
|
+
partitions.each do |_, response|
|
300
|
+
Protocol.handle_error(response.error_code)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
topics
|
305
|
+
end
|
306
|
+
|
255
307
|
def create_partitions_for(name, num_partitions:, timeout:)
|
256
308
|
options = {
|
257
309
|
topics: [[name, num_partitions, nil]],
|
data/lib/kafka/connection.rb
CHANGED
data/lib/kafka/consumer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/consumer_group"
|
4
|
+
require "kafka/interceptors"
|
4
5
|
require "kafka/offset_manager"
|
5
6
|
require "kafka/fetcher"
|
6
7
|
require "kafka/pause"
|
@@ -44,7 +45,8 @@ module Kafka
|
|
44
45
|
#
|
45
46
|
class Consumer
|
46
47
|
|
47
|
-
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
48
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
49
|
+
session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
|
48
50
|
@cluster = cluster
|
49
51
|
@logger = TaggedLogger.new(logger)
|
50
52
|
@instrumenter = instrumenter
|
@@ -53,6 +55,8 @@ module Kafka
|
|
53
55
|
@session_timeout = session_timeout
|
54
56
|
@fetcher = fetcher
|
55
57
|
@heartbeat = heartbeat
|
58
|
+
@refresh_topic_interval = refresh_topic_interval
|
59
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
56
60
|
|
57
61
|
@pauses = Hash.new {|h, k|
|
58
62
|
h[k] = Hash.new {|h2, k2|
|
@@ -73,6 +77,15 @@ module Kafka
|
|
73
77
|
# when user commits message other than last in a batch, this would make ruby-kafka refetch
|
74
78
|
# some already consumed messages
|
75
79
|
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
80
|
+
|
81
|
+
# Map storing subscribed topics with their configuration
|
82
|
+
@subscribed_topics = Hash.new
|
83
|
+
|
84
|
+
# Set storing topics that matched topics in @subscribed_topics
|
85
|
+
@matched_topics = Set.new
|
86
|
+
|
87
|
+
# Whether join_group must be executed again because new topics are added
|
88
|
+
@join_group_for_new_topics = false
|
76
89
|
end
|
77
90
|
|
78
91
|
# Subscribes the consumer to a topic.
|
@@ -97,13 +110,12 @@ module Kafka
|
|
97
110
|
def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
|
98
111
|
default_offset ||= start_from_beginning ? :earliest : :latest
|
99
112
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
113
|
+
@subscribed_topics[topic_or_regex] = {
|
114
|
+
default_offset: default_offset,
|
115
|
+
start_from_beginning: start_from_beginning,
|
116
|
+
max_bytes_per_partition: max_bytes_per_partition
|
117
|
+
}
|
118
|
+
scan_for_subscribing
|
107
119
|
|
108
120
|
nil
|
109
121
|
end
|
@@ -116,7 +128,6 @@ module Kafka
|
|
116
128
|
def stop
|
117
129
|
@running = false
|
118
130
|
@fetcher.stop
|
119
|
-
@cluster.disconnect
|
120
131
|
end
|
121
132
|
|
122
133
|
# Pause processing of a specific topic partition.
|
@@ -212,6 +223,7 @@ module Kafka
|
|
212
223
|
batches = fetch_batches
|
213
224
|
|
214
225
|
batches.each do |batch|
|
226
|
+
batch = @interceptors.call(batch)
|
215
227
|
batch.messages.each do |message|
|
216
228
|
notification = {
|
217
229
|
topic: message.topic,
|
@@ -303,11 +315,13 @@ module Kafka
|
|
303
315
|
unless batch.empty?
|
304
316
|
raw_messages = batch.messages
|
305
317
|
batch.messages = raw_messages.reject(&:is_control_record)
|
318
|
+
batch = @interceptors.call(batch)
|
306
319
|
|
307
320
|
notification = {
|
308
321
|
topic: batch.topic,
|
309
322
|
partition: batch.partition,
|
310
323
|
last_offset: batch.last_offset,
|
324
|
+
last_create_time: batch.messages.last && batch.messages.last.create_time,
|
311
325
|
offset_lag: batch.offset_lag,
|
312
326
|
highwater_mark_offset: batch.highwater_mark_offset,
|
313
327
|
message_count: batch.messages.count,
|
@@ -401,6 +415,7 @@ module Kafka
|
|
401
415
|
while running?
|
402
416
|
begin
|
403
417
|
@instrumenter.instrument("loop.consumer") do
|
418
|
+
refresh_topic_list_if_enabled
|
404
419
|
yield
|
405
420
|
end
|
406
421
|
rescue HeartbeatError
|
@@ -432,6 +447,7 @@ module Kafka
|
|
432
447
|
# important that members explicitly tell Kafka when they're leaving.
|
433
448
|
make_final_offsets_commit!
|
434
449
|
@group.leave rescue nil
|
450
|
+
@cluster.disconnect
|
435
451
|
@running = false
|
436
452
|
@logger.pop_tags
|
437
453
|
end
|
@@ -452,6 +468,8 @@ module Kafka
|
|
452
468
|
end
|
453
469
|
|
454
470
|
def join_group
|
471
|
+
@join_group_for_new_topics = false
|
472
|
+
|
455
473
|
old_generation_id = @group.generation_id
|
456
474
|
|
457
475
|
@group.join
|
@@ -513,11 +531,19 @@ module Kafka
|
|
513
531
|
end
|
514
532
|
end
|
515
533
|
|
534
|
+
def refresh_topic_list_if_enabled
|
535
|
+
return if @refresh_topic_interval <= 0
|
536
|
+
return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
|
537
|
+
|
538
|
+
scan_for_subscribing
|
539
|
+
@refreshed_at = Time.now
|
540
|
+
end
|
541
|
+
|
516
542
|
def fetch_batches
|
517
543
|
# Return early if the consumer has been stopped.
|
518
544
|
return [] if shutting_down?
|
519
545
|
|
520
|
-
join_group
|
546
|
+
join_group if !@group.member? || @join_group_for_new_topics
|
521
547
|
|
522
548
|
trigger_heartbeat
|
523
549
|
|
@@ -525,7 +551,7 @@ module Kafka
|
|
525
551
|
|
526
552
|
if !@fetcher.data?
|
527
553
|
@logger.debug "No batches to process"
|
528
|
-
sleep 2
|
554
|
+
sleep(@fetcher.max_wait_time || 2)
|
529
555
|
[]
|
530
556
|
else
|
531
557
|
tag, message = @fetcher.poll
|
@@ -571,10 +597,34 @@ module Kafka
|
|
571
597
|
end
|
572
598
|
end
|
573
599
|
|
600
|
+
def scan_for_subscribing
|
601
|
+
@subscribed_topics.each do |topic_or_regex, config|
|
602
|
+
default_offset = config.fetch(:default_offset)
|
603
|
+
start_from_beginning = config.fetch(:start_from_beginning)
|
604
|
+
max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
|
605
|
+
if topic_or_regex.is_a?(Regexp)
|
606
|
+
subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
607
|
+
else
|
608
|
+
subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
609
|
+
end
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
614
|
+
cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
|
615
|
+
subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
616
|
+
end
|
617
|
+
end
|
618
|
+
|
574
619
|
def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
620
|
+
return if @matched_topics.include?(topic)
|
621
|
+
@matched_topics.add(topic)
|
622
|
+
@join_group_for_new_topics = true
|
623
|
+
|
575
624
|
@group.subscribe(topic)
|
576
625
|
@offset_manager.set_default_offset(topic, default_offset)
|
577
626
|
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
627
|
+
@cluster.mark_as_stale!
|
578
628
|
end
|
579
629
|
|
580
630
|
def cluster_topics
|
data/lib/kafka/consumer_group.rb
CHANGED
@@ -1,24 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "set"
|
4
|
+
require "kafka/consumer_group/assignor"
|
4
5
|
require "kafka/round_robin_assignment_strategy"
|
5
6
|
|
6
7
|
module Kafka
|
7
8
|
class ConsumerGroup
|
8
9
|
attr_reader :assigned_partitions, :generation_id, :group_id
|
9
10
|
|
10
|
-
def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
|
11
|
+
def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
|
11
12
|
@cluster = cluster
|
12
13
|
@logger = TaggedLogger.new(logger)
|
13
14
|
@group_id = group_id
|
14
15
|
@session_timeout = session_timeout
|
16
|
+
@rebalance_timeout = rebalance_timeout
|
15
17
|
@instrumenter = instrumenter
|
16
18
|
@member_id = ""
|
17
19
|
@generation_id = nil
|
18
20
|
@members = {}
|
19
21
|
@topics = Set.new
|
20
22
|
@assigned_partitions = {}
|
21
|
-
@
|
23
|
+
@assignor = Assignor.new(
|
24
|
+
cluster: cluster,
|
25
|
+
strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
|
26
|
+
)
|
22
27
|
@retention_time = retention_time
|
23
28
|
end
|
24
29
|
|
@@ -112,9 +117,12 @@ module Kafka
|
|
112
117
|
|
113
118
|
Protocol.handle_error(response.error_code)
|
114
119
|
end
|
115
|
-
rescue ConnectionError, UnknownMemberId,
|
120
|
+
rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
|
116
121
|
@logger.error "Error sending heartbeat: #{e}"
|
117
122
|
raise HeartbeatError, e
|
123
|
+
rescue RebalanceInProgress => e
|
124
|
+
@logger.warn "Error sending heartbeat: #{e}"
|
125
|
+
raise HeartbeatError, e
|
118
126
|
rescue NotCoordinatorForGroup
|
119
127
|
@logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
|
120
128
|
sleep 1
|
@@ -140,7 +148,11 @@ module Kafka
|
|
140
148
|
response = coordinator.join_group(
|
141
149
|
group_id: @group_id,
|
142
150
|
session_timeout: @session_timeout,
|
151
|
+
rebalance_timeout: @rebalance_timeout,
|
143
152
|
member_id: @member_id,
|
153
|
+
topics: @topics,
|
154
|
+
protocol_name: @assignor.protocol_name,
|
155
|
+
user_data: @assignor.user_data,
|
144
156
|
)
|
145
157
|
|
146
158
|
Protocol.handle_error(response.error_code)
|
@@ -158,6 +170,12 @@ module Kafka
|
|
158
170
|
@member_id = ""
|
159
171
|
sleep 1
|
160
172
|
|
173
|
+
retry
|
174
|
+
rescue CoordinatorLoadInProgress
|
175
|
+
@logger.error "Coordinator broker still loading, retrying in 1s..."
|
176
|
+
|
177
|
+
sleep 1
|
178
|
+
|
161
179
|
retry
|
162
180
|
end
|
163
181
|
|
@@ -171,8 +189,8 @@ module Kafka
|
|
171
189
|
if group_leader?
|
172
190
|
@logger.info "Chosen as leader of group `#{@group_id}`"
|
173
191
|
|
174
|
-
group_assignment = @
|
175
|
-
members: @members
|
192
|
+
group_assignment = @assignor.assign(
|
193
|
+
members: @members,
|
176
194
|
topics: @topics,
|
177
195
|
)
|
178
196
|
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/protocol/member_assignment"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class ConsumerGroup
|
7
|
+
|
8
|
+
# A consumer group partition assignor
|
9
|
+
class Assignor
|
10
|
+
Partition = Struct.new(:topic, :partition_id)
|
11
|
+
|
12
|
+
# @param cluster [Kafka::Cluster]
|
13
|
+
# @param strategy [Object] an object that implements #protocol_type,
|
14
|
+
# #user_data, and #assign.
|
15
|
+
def initialize(cluster:, strategy:)
|
16
|
+
@cluster = cluster
|
17
|
+
@strategy = strategy
|
18
|
+
end
|
19
|
+
|
20
|
+
def protocol_name
|
21
|
+
@strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def user_data
|
25
|
+
@strategy.user_data if @strategy.respond_to?(:user_data)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Assign the topic partitions to the group members.
|
29
|
+
#
|
30
|
+
# @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
|
31
|
+
# mapping member ids to metadata.
|
32
|
+
# @param topics [Array<String>] topics
|
33
|
+
# @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
|
34
|
+
# ids to assignments.
|
35
|
+
def assign(members:, topics:)
|
36
|
+
topic_partitions = topics.flat_map do |topic|
|
37
|
+
begin
|
38
|
+
partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
|
39
|
+
rescue UnknownTopicOrPartition
|
40
|
+
raise UnknownTopicOrPartition, "unknown topic #{topic}"
|
41
|
+
end
|
42
|
+
partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
|
43
|
+
end
|
44
|
+
|
45
|
+
group_assignment = {}
|
46
|
+
|
47
|
+
members.each_key do |member_id|
|
48
|
+
group_assignment[member_id] = Protocol::MemberAssignment.new
|
49
|
+
end
|
50
|
+
@strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
|
51
|
+
Array(partitions).each do |partition|
|
52
|
+
group_assignment[member_id].assign(partition.topic, [partition.partition_id])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
group_assignment
|
57
|
+
rescue Kafka::LeaderNotAvailable
|
58
|
+
sleep 1
|
59
|
+
retry
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/kafka/datadog.rb
CHANGED
@@ -31,7 +31,7 @@ module Kafka
|
|
31
31
|
|
32
32
|
class << self
|
33
33
|
def statsd
|
34
|
-
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
|
34
|
+
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
|
35
35
|
end
|
36
36
|
|
37
37
|
def statsd=(statsd)
|
@@ -40,7 +40,7 @@ module Kafka
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def host
|
43
|
-
@host
|
43
|
+
@host
|
44
44
|
end
|
45
45
|
|
46
46
|
def host=(host)
|
@@ -49,7 +49,7 @@ module Kafka
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def port
|
52
|
-
@port
|
52
|
+
@port
|
53
53
|
end
|
54
54
|
|
55
55
|
def port=(port)
|
@@ -57,6 +57,15 @@ module Kafka
|
|
57
57
|
clear
|
58
58
|
end
|
59
59
|
|
60
|
+
def socket_path
|
61
|
+
@socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
def socket_path=(socket_path)
|
65
|
+
@socket_path = socket_path
|
66
|
+
clear
|
67
|
+
end
|
68
|
+
|
60
69
|
def namespace
|
61
70
|
@namespace ||= STATSD_NAMESPACE
|
62
71
|
end
|
@@ -77,14 +86,6 @@ module Kafka
|
|
77
86
|
|
78
87
|
private
|
79
88
|
|
80
|
-
def default_host
|
81
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
|
82
|
-
end
|
83
|
-
|
84
|
-
def default_port
|
85
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
|
86
|
-
end
|
87
|
-
|
88
89
|
def clear
|
89
90
|
@statsd && @statsd.close
|
90
91
|
@statsd = nil
|
@@ -95,8 +96,8 @@ module Kafka
|
|
95
96
|
private
|
96
97
|
|
97
98
|
%w[increment histogram count timing gauge].each do |type|
|
98
|
-
define_method(type) do |*args|
|
99
|
-
emit(type, *args)
|
99
|
+
define_method(type) do |*args, **kwargs|
|
100
|
+
emit(type, *args, **kwargs)
|
100
101
|
end
|
101
102
|
end
|
102
103
|
|
@@ -168,6 +169,8 @@ module Kafka
|
|
168
169
|
def process_batch(event)
|
169
170
|
offset = event.payload.fetch(:last_offset)
|
170
171
|
messages = event.payload.fetch(:message_count)
|
172
|
+
create_time = event.payload.fetch(:last_create_time)
|
173
|
+
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
171
174
|
|
172
175
|
tags = {
|
173
176
|
client: event.payload.fetch(:client_id),
|
@@ -184,6 +187,10 @@ module Kafka
|
|
184
187
|
end
|
185
188
|
|
186
189
|
gauge("consumer.offset", offset, tags: tags)
|
190
|
+
|
191
|
+
if time_lag
|
192
|
+
gauge("consumer.time_lag", time_lag, tags: tags)
|
193
|
+
end
|
187
194
|
end
|
188
195
|
|
189
196
|
def fetch_batch(event)
|