ruby-kafka 0.7.9 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +135 -3
- data/.github/workflows/stale.yml +19 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +26 -0
- data/README.md +46 -0
- data/lib/kafka/async_producer.rb +5 -2
- data/lib/kafka/client.rb +68 -4
- data/lib/kafka/cluster.rb +52 -0
- data/lib/kafka/connection.rb +3 -0
- data/lib/kafka/consumer.rb +61 -11
- data/lib/kafka/consumer_group.rb +10 -1
- data/lib/kafka/datadog.rb +20 -13
- data/lib/kafka/fetcher.rb +5 -2
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/partitioner.rb +1 -1
- data/lib/kafka/producer.rb +13 -5
- data/lib/kafka/prometheus.rb +78 -79
- data/lib/kafka/protocol/join_group_request.rb +8 -2
- data/lib/kafka/protocol/metadata_response.rb +1 -1
- data/lib/kafka/protocol/offset_fetch_request.rb +3 -1
- data/lib/kafka/protocol/record_batch.rb +5 -4
- data/lib/kafka/ssl_context.rb +4 -3
- data/lib/kafka/tagged_logger.rb +3 -2
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +4 -4
- metadata +19 -11
data/lib/kafka/cluster.rb
CHANGED
@@ -45,6 +45,10 @@ module Kafka
|
|
45
45
|
new_topics = topics - @target_topics
|
46
46
|
|
47
47
|
unless new_topics.empty?
|
48
|
+
if new_topics.any? { |topic| topic.nil? or topic.empty? }
|
49
|
+
raise ArgumentError, "Topic must not be nil or empty"
|
50
|
+
end
|
51
|
+
|
48
52
|
@logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
|
49
53
|
|
50
54
|
@target_topics.merge(new_topics)
|
@@ -139,6 +143,40 @@ module Kafka
|
|
139
143
|
end
|
140
144
|
end
|
141
145
|
|
146
|
+
def describe_configs(broker_id, configs = [])
|
147
|
+
options = {
|
148
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
|
149
|
+
}
|
150
|
+
|
151
|
+
info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
|
152
|
+
broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
153
|
+
|
154
|
+
response = broker.describe_configs(**options)
|
155
|
+
|
156
|
+
response.resources.each do |resource|
|
157
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
158
|
+
end
|
159
|
+
|
160
|
+
response.resources.first.configs
|
161
|
+
end
|
162
|
+
|
163
|
+
def alter_configs(broker_id, configs = [])
|
164
|
+
options = {
|
165
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
|
166
|
+
}
|
167
|
+
|
168
|
+
info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
|
169
|
+
broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
170
|
+
|
171
|
+
response = broker.alter_configs(**options)
|
172
|
+
|
173
|
+
response.resources.each do |resource|
|
174
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
175
|
+
end
|
176
|
+
|
177
|
+
nil
|
178
|
+
end
|
179
|
+
|
142
180
|
def partitions_for(topic)
|
143
181
|
add_target_topics([topic])
|
144
182
|
refresh_metadata_if_necessary!
|
@@ -252,6 +290,20 @@ module Kafka
|
|
252
290
|
group
|
253
291
|
end
|
254
292
|
|
293
|
+
def fetch_group_offsets(group_id)
|
294
|
+
topics = get_group_coordinator(group_id: group_id)
|
295
|
+
.fetch_offsets(group_id: group_id, topics: nil)
|
296
|
+
.topics
|
297
|
+
|
298
|
+
topics.each do |_, partitions|
|
299
|
+
partitions.each do |_, response|
|
300
|
+
Protocol.handle_error(response.error_code)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
topics
|
305
|
+
end
|
306
|
+
|
255
307
|
def create_partitions_for(name, num_partitions:, timeout:)
|
256
308
|
options = {
|
257
309
|
topics: [[name, num_partitions, nil]],
|
data/lib/kafka/connection.rb
CHANGED
data/lib/kafka/consumer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/consumer_group"
|
4
|
+
require "kafka/interceptors"
|
4
5
|
require "kafka/offset_manager"
|
5
6
|
require "kafka/fetcher"
|
6
7
|
require "kafka/pause"
|
@@ -44,7 +45,8 @@ module Kafka
|
|
44
45
|
#
|
45
46
|
class Consumer
|
46
47
|
|
47
|
-
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
48
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
49
|
+
session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
|
48
50
|
@cluster = cluster
|
49
51
|
@logger = TaggedLogger.new(logger)
|
50
52
|
@instrumenter = instrumenter
|
@@ -53,6 +55,8 @@ module Kafka
|
|
53
55
|
@session_timeout = session_timeout
|
54
56
|
@fetcher = fetcher
|
55
57
|
@heartbeat = heartbeat
|
58
|
+
@refresh_topic_interval = refresh_topic_interval
|
59
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
56
60
|
|
57
61
|
@pauses = Hash.new {|h, k|
|
58
62
|
h[k] = Hash.new {|h2, k2|
|
@@ -73,6 +77,15 @@ module Kafka
|
|
73
77
|
# when user commits message other than last in a batch, this would make ruby-kafka refetch
|
74
78
|
# some already consumed messages
|
75
79
|
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
80
|
+
|
81
|
+
# Map storing subscribed topics with their configuration
|
82
|
+
@subscribed_topics = Hash.new
|
83
|
+
|
84
|
+
# Set storing topics that matched topics in @subscribed_topics
|
85
|
+
@matched_topics = Set.new
|
86
|
+
|
87
|
+
# Whether join_group must be executed again because new topics are added
|
88
|
+
@join_group_for_new_topics = false
|
76
89
|
end
|
77
90
|
|
78
91
|
# Subscribes the consumer to a topic.
|
@@ -97,13 +110,12 @@ module Kafka
|
|
97
110
|
def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
|
98
111
|
default_offset ||= start_from_beginning ? :earliest : :latest
|
99
112
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
113
|
+
@subscribed_topics[topic_or_regex] = {
|
114
|
+
default_offset: default_offset,
|
115
|
+
start_from_beginning: start_from_beginning,
|
116
|
+
max_bytes_per_partition: max_bytes_per_partition
|
117
|
+
}
|
118
|
+
scan_for_subscribing
|
107
119
|
|
108
120
|
nil
|
109
121
|
end
|
@@ -116,7 +128,6 @@ module Kafka
|
|
116
128
|
def stop
|
117
129
|
@running = false
|
118
130
|
@fetcher.stop
|
119
|
-
@cluster.disconnect
|
120
131
|
end
|
121
132
|
|
122
133
|
# Pause processing of a specific topic partition.
|
@@ -212,6 +223,7 @@ module Kafka
|
|
212
223
|
batches = fetch_batches
|
213
224
|
|
214
225
|
batches.each do |batch|
|
226
|
+
batch = @interceptors.call(batch)
|
215
227
|
batch.messages.each do |message|
|
216
228
|
notification = {
|
217
229
|
topic: message.topic,
|
@@ -303,11 +315,13 @@ module Kafka
|
|
303
315
|
unless batch.empty?
|
304
316
|
raw_messages = batch.messages
|
305
317
|
batch.messages = raw_messages.reject(&:is_control_record)
|
318
|
+
batch = @interceptors.call(batch)
|
306
319
|
|
307
320
|
notification = {
|
308
321
|
topic: batch.topic,
|
309
322
|
partition: batch.partition,
|
310
323
|
last_offset: batch.last_offset,
|
324
|
+
last_create_time: batch.messages.last && batch.messages.last.create_time,
|
311
325
|
offset_lag: batch.offset_lag,
|
312
326
|
highwater_mark_offset: batch.highwater_mark_offset,
|
313
327
|
message_count: batch.messages.count,
|
@@ -401,6 +415,7 @@ module Kafka
|
|
401
415
|
while running?
|
402
416
|
begin
|
403
417
|
@instrumenter.instrument("loop.consumer") do
|
418
|
+
refresh_topic_list_if_enabled
|
404
419
|
yield
|
405
420
|
end
|
406
421
|
rescue HeartbeatError
|
@@ -432,6 +447,7 @@ module Kafka
|
|
432
447
|
# important that members explicitly tell Kafka when they're leaving.
|
433
448
|
make_final_offsets_commit!
|
434
449
|
@group.leave rescue nil
|
450
|
+
@cluster.disconnect
|
435
451
|
@running = false
|
436
452
|
@logger.pop_tags
|
437
453
|
end
|
@@ -452,6 +468,8 @@ module Kafka
|
|
452
468
|
end
|
453
469
|
|
454
470
|
def join_group
|
471
|
+
@join_group_for_new_topics = false
|
472
|
+
|
455
473
|
old_generation_id = @group.generation_id
|
456
474
|
|
457
475
|
@group.join
|
@@ -513,11 +531,19 @@ module Kafka
|
|
513
531
|
end
|
514
532
|
end
|
515
533
|
|
534
|
+
def refresh_topic_list_if_enabled
|
535
|
+
return if @refresh_topic_interval <= 0
|
536
|
+
return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
|
537
|
+
|
538
|
+
scan_for_subscribing
|
539
|
+
@refreshed_at = Time.now
|
540
|
+
end
|
541
|
+
|
516
542
|
def fetch_batches
|
517
543
|
# Return early if the consumer has been stopped.
|
518
544
|
return [] if shutting_down?
|
519
545
|
|
520
|
-
join_group
|
546
|
+
join_group if !@group.member? || @join_group_for_new_topics
|
521
547
|
|
522
548
|
trigger_heartbeat
|
523
549
|
|
@@ -525,7 +551,7 @@ module Kafka
|
|
525
551
|
|
526
552
|
if !@fetcher.data?
|
527
553
|
@logger.debug "No batches to process"
|
528
|
-
sleep 2
|
554
|
+
sleep(@fetcher.max_wait_time || 2)
|
529
555
|
[]
|
530
556
|
else
|
531
557
|
tag, message = @fetcher.poll
|
@@ -571,10 +597,34 @@ module Kafka
|
|
571
597
|
end
|
572
598
|
end
|
573
599
|
|
600
|
+
def scan_for_subscribing
|
601
|
+
@subscribed_topics.each do |topic_or_regex, config|
|
602
|
+
default_offset = config.fetch(:default_offset)
|
603
|
+
start_from_beginning = config.fetch(:start_from_beginning)
|
604
|
+
max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
|
605
|
+
if topic_or_regex.is_a?(Regexp)
|
606
|
+
subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
607
|
+
else
|
608
|
+
subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
609
|
+
end
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
614
|
+
cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
|
615
|
+
subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
616
|
+
end
|
617
|
+
end
|
618
|
+
|
574
619
|
def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
620
|
+
return if @matched_topics.include?(topic)
|
621
|
+
@matched_topics.add(topic)
|
622
|
+
@join_group_for_new_topics = true
|
623
|
+
|
575
624
|
@group.subscribe(topic)
|
576
625
|
@offset_manager.set_default_offset(topic, default_offset)
|
577
626
|
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
627
|
+
@cluster.mark_as_stale!
|
578
628
|
end
|
579
629
|
|
580
630
|
def cluster_topics
|
data/lib/kafka/consumer_group.rb
CHANGED
@@ -7,11 +7,12 @@ module Kafka
|
|
7
7
|
class ConsumerGroup
|
8
8
|
attr_reader :assigned_partitions, :generation_id, :group_id
|
9
9
|
|
10
|
-
def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
|
10
|
+
def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:)
|
11
11
|
@cluster = cluster
|
12
12
|
@logger = TaggedLogger.new(logger)
|
13
13
|
@group_id = group_id
|
14
14
|
@session_timeout = session_timeout
|
15
|
+
@rebalance_timeout = rebalance_timeout
|
15
16
|
@instrumenter = instrumenter
|
16
17
|
@member_id = ""
|
17
18
|
@generation_id = nil
|
@@ -140,7 +141,9 @@ module Kafka
|
|
140
141
|
response = coordinator.join_group(
|
141
142
|
group_id: @group_id,
|
142
143
|
session_timeout: @session_timeout,
|
144
|
+
rebalance_timeout: @rebalance_timeout,
|
143
145
|
member_id: @member_id,
|
146
|
+
topics: @topics,
|
144
147
|
)
|
145
148
|
|
146
149
|
Protocol.handle_error(response.error_code)
|
@@ -158,6 +161,12 @@ module Kafka
|
|
158
161
|
@member_id = ""
|
159
162
|
sleep 1
|
160
163
|
|
164
|
+
retry
|
165
|
+
rescue CoordinatorLoadInProgress
|
166
|
+
@logger.error "Coordinator broker still loading, retrying in 1s..."
|
167
|
+
|
168
|
+
sleep 1
|
169
|
+
|
161
170
|
retry
|
162
171
|
end
|
163
172
|
|
data/lib/kafka/datadog.rb
CHANGED
@@ -31,7 +31,7 @@ module Kafka
|
|
31
31
|
|
32
32
|
class << self
|
33
33
|
def statsd
|
34
|
-
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
|
34
|
+
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
|
35
35
|
end
|
36
36
|
|
37
37
|
def statsd=(statsd)
|
@@ -40,7 +40,7 @@ module Kafka
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def host
|
43
|
-
@host
|
43
|
+
@host
|
44
44
|
end
|
45
45
|
|
46
46
|
def host=(host)
|
@@ -49,7 +49,7 @@ module Kafka
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def port
|
52
|
-
@port
|
52
|
+
@port
|
53
53
|
end
|
54
54
|
|
55
55
|
def port=(port)
|
@@ -57,6 +57,15 @@ module Kafka
|
|
57
57
|
clear
|
58
58
|
end
|
59
59
|
|
60
|
+
def socket_path
|
61
|
+
@socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
def socket_path=(socket_path)
|
65
|
+
@socket_path = socket_path
|
66
|
+
clear
|
67
|
+
end
|
68
|
+
|
60
69
|
def namespace
|
61
70
|
@namespace ||= STATSD_NAMESPACE
|
62
71
|
end
|
@@ -77,14 +86,6 @@ module Kafka
|
|
77
86
|
|
78
87
|
private
|
79
88
|
|
80
|
-
def default_host
|
81
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
|
82
|
-
end
|
83
|
-
|
84
|
-
def default_port
|
85
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
|
86
|
-
end
|
87
|
-
|
88
89
|
def clear
|
89
90
|
@statsd && @statsd.close
|
90
91
|
@statsd = nil
|
@@ -95,8 +96,8 @@ module Kafka
|
|
95
96
|
private
|
96
97
|
|
97
98
|
%w[increment histogram count timing gauge].each do |type|
|
98
|
-
define_method(type) do |*args|
|
99
|
-
emit(type, *args)
|
99
|
+
define_method(type) do |*args, **kwargs|
|
100
|
+
emit(type, *args, **kwargs)
|
100
101
|
end
|
101
102
|
end
|
102
103
|
|
@@ -168,6 +169,8 @@ module Kafka
|
|
168
169
|
def process_batch(event)
|
169
170
|
offset = event.payload.fetch(:last_offset)
|
170
171
|
messages = event.payload.fetch(:message_count)
|
172
|
+
create_time = event.payload.fetch(:last_create_time)
|
173
|
+
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
171
174
|
|
172
175
|
tags = {
|
173
176
|
client: event.payload.fetch(:client_id),
|
@@ -184,6 +187,10 @@ module Kafka
|
|
184
187
|
end
|
185
188
|
|
186
189
|
gauge("consumer.offset", offset, tags: tags)
|
190
|
+
|
191
|
+
if time_lag
|
192
|
+
gauge("consumer.time_lag", time_lag, tags: tags)
|
193
|
+
end
|
187
194
|
end
|
188
195
|
|
189
196
|
def fetch_batch(event)
|
data/lib/kafka/fetcher.rb
CHANGED
@@ -4,7 +4,7 @@ require "kafka/fetch_operation"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
class Fetcher
|
7
|
-
attr_reader :queue
|
7
|
+
attr_reader :queue, :max_wait_time
|
8
8
|
|
9
9
|
def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
|
10
10
|
@cluster = cluster
|
@@ -17,6 +17,9 @@ module Kafka
|
|
17
17
|
@commands = Queue.new
|
18
18
|
@next_offsets = Hash.new { |h, k| h[k] = {} }
|
19
19
|
|
20
|
+
# We are only running when someone calls start.
|
21
|
+
@running = false
|
22
|
+
|
20
23
|
# Long poll until at least this many bytes can be fetched.
|
21
24
|
@min_bytes = 1
|
22
25
|
|
@@ -110,7 +113,7 @@ module Kafka
|
|
110
113
|
elsif @queue.size < @max_queue_size
|
111
114
|
step
|
112
115
|
else
|
113
|
-
@logger.
|
116
|
+
@logger.info "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
|
114
117
|
sleep 1
|
115
118
|
end
|
116
119
|
ensure
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Holds a list of interceptors that implement `call`
|
5
|
+
# and wraps calls to a chain of custom interceptors.
|
6
|
+
class Interceptors
|
7
|
+
def initialize(interceptors:, logger:)
|
8
|
+
@interceptors = interceptors || []
|
9
|
+
@logger = TaggedLogger.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This method is called when the client produces a message or once the batches are fetched.
|
13
|
+
# The message returned from the first call is passed to the second interceptor call, and so on in an
|
14
|
+
# interceptor chain. This method does not throw exceptions.
|
15
|
+
#
|
16
|
+
# @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
|
17
|
+
# fetched batch.
|
18
|
+
#
|
19
|
+
# @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
|
20
|
+
# returned by the last interceptor.
|
21
|
+
def call(intercepted)
|
22
|
+
@interceptors.each do |interceptor|
|
23
|
+
begin
|
24
|
+
intercepted = interceptor.call(intercepted)
|
25
|
+
rescue Exception => e
|
26
|
+
@logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
intercepted
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/kafka/offset_manager.rb
CHANGED
@@ -50,9 +50,20 @@ module Kafka
|
|
50
50
|
# @param offset [Integer] the offset of the message that should be marked as processed.
|
51
51
|
# @return [nil]
|
52
52
|
def mark_as_processed(topic, partition, offset)
|
53
|
-
@
|
53
|
+
unless @group.assigned_to?(topic, partition)
|
54
|
+
@logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
|
55
|
+
return
|
56
|
+
end
|
54
57
|
@processed_offsets[topic] ||= {}
|
55
58
|
|
59
|
+
last_processed_offset = @processed_offsets[topic][partition] || -1
|
60
|
+
if last_processed_offset > offset + 1
|
61
|
+
@logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
@uncommitted_offsets += 1
|
66
|
+
|
56
67
|
# The committed offset should always be the offset of the next message that the
|
57
68
|
# application will read, thus adding one to the last message processed.
|
58
69
|
@processed_offsets[topic][partition] = offset + 1
|