ruby-kafka 0.7.10 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +179 -0
- data/.github/workflows/stale.yml +19 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +40 -0
- data/README.md +167 -0
- data/lib/kafka/async_producer.rb +60 -42
- data/lib/kafka/client.rb +92 -6
- data/lib/kafka/cluster.rb +82 -24
- data/lib/kafka/connection.rb +3 -0
- data/lib/kafka/consumer.rb +61 -11
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/consumer_group.rb +29 -6
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +20 -13
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/fetcher.rb +5 -2
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/partitioner.rb +8 -3
- data/lib/kafka/producer.rb +13 -5
- data/lib/kafka/prometheus.rb +78 -79
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +2 -0
- data/lib/kafka/protocol/encoder.rb +1 -1
- data/lib/kafka/protocol/join_group_request.rb +8 -2
- data/lib/kafka/protocol/join_group_response.rb +9 -1
- data/lib/kafka/protocol/metadata_response.rb +1 -1
- data/lib/kafka/protocol/offset_fetch_request.rb +3 -1
- data/lib/kafka/protocol/record_batch.rb +2 -2
- data/lib/kafka/protocol/sasl_handshake_request.rb +1 -1
- data/lib/kafka/protocol/sync_group_response.rb +5 -2
- data/lib/kafka/protocol/txn_offset_commit_response.rb +34 -5
- data/lib/kafka/round_robin_assignment_strategy.rb +37 -39
- data/lib/kafka/sasl/awsmskiam.rb +133 -0
- data/lib/kafka/sasl_authenticator.rb +15 -2
- data/lib/kafka/ssl_context.rb +6 -5
- data/lib/kafka/tagged_logger.rb +1 -0
- data/lib/kafka/transaction_manager.rb +30 -10
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +5 -4
- metadata +39 -13
data/lib/kafka/consumer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/consumer_group"
|
4
|
+
require "kafka/interceptors"
|
4
5
|
require "kafka/offset_manager"
|
5
6
|
require "kafka/fetcher"
|
6
7
|
require "kafka/pause"
|
@@ -44,7 +45,8 @@ module Kafka
|
|
44
45
|
#
|
45
46
|
class Consumer
|
46
47
|
|
47
|
-
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
48
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
49
|
+
session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
|
48
50
|
@cluster = cluster
|
49
51
|
@logger = TaggedLogger.new(logger)
|
50
52
|
@instrumenter = instrumenter
|
@@ -53,6 +55,8 @@ module Kafka
|
|
53
55
|
@session_timeout = session_timeout
|
54
56
|
@fetcher = fetcher
|
55
57
|
@heartbeat = heartbeat
|
58
|
+
@refresh_topic_interval = refresh_topic_interval
|
59
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
56
60
|
|
57
61
|
@pauses = Hash.new {|h, k|
|
58
62
|
h[k] = Hash.new {|h2, k2|
|
@@ -73,6 +77,15 @@ module Kafka
|
|
73
77
|
# when user commits message other than last in a batch, this would make ruby-kafka refetch
|
74
78
|
# some already consumed messages
|
75
79
|
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
80
|
+
|
81
|
+
# Map storing subscribed topics with their configuration
|
82
|
+
@subscribed_topics = Hash.new
|
83
|
+
|
84
|
+
# Set storing topics that matched topics in @subscribed_topics
|
85
|
+
@matched_topics = Set.new
|
86
|
+
|
87
|
+
# Whether join_group must be executed again because new topics are added
|
88
|
+
@join_group_for_new_topics = false
|
76
89
|
end
|
77
90
|
|
78
91
|
# Subscribes the consumer to a topic.
|
@@ -97,13 +110,12 @@ module Kafka
|
|
97
110
|
def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
|
98
111
|
default_offset ||= start_from_beginning ? :earliest : :latest
|
99
112
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
113
|
+
@subscribed_topics[topic_or_regex] = {
|
114
|
+
default_offset: default_offset,
|
115
|
+
start_from_beginning: start_from_beginning,
|
116
|
+
max_bytes_per_partition: max_bytes_per_partition
|
117
|
+
}
|
118
|
+
scan_for_subscribing
|
107
119
|
|
108
120
|
nil
|
109
121
|
end
|
@@ -116,7 +128,6 @@ module Kafka
|
|
116
128
|
def stop
|
117
129
|
@running = false
|
118
130
|
@fetcher.stop
|
119
|
-
@cluster.disconnect
|
120
131
|
end
|
121
132
|
|
122
133
|
# Pause processing of a specific topic partition.
|
@@ -212,6 +223,7 @@ module Kafka
|
|
212
223
|
batches = fetch_batches
|
213
224
|
|
214
225
|
batches.each do |batch|
|
226
|
+
batch = @interceptors.call(batch)
|
215
227
|
batch.messages.each do |message|
|
216
228
|
notification = {
|
217
229
|
topic: message.topic,
|
@@ -303,11 +315,13 @@ module Kafka
|
|
303
315
|
unless batch.empty?
|
304
316
|
raw_messages = batch.messages
|
305
317
|
batch.messages = raw_messages.reject(&:is_control_record)
|
318
|
+
batch = @interceptors.call(batch)
|
306
319
|
|
307
320
|
notification = {
|
308
321
|
topic: batch.topic,
|
309
322
|
partition: batch.partition,
|
310
323
|
last_offset: batch.last_offset,
|
324
|
+
last_create_time: batch.messages.last && batch.messages.last.create_time,
|
311
325
|
offset_lag: batch.offset_lag,
|
312
326
|
highwater_mark_offset: batch.highwater_mark_offset,
|
313
327
|
message_count: batch.messages.count,
|
@@ -401,6 +415,7 @@ module Kafka
|
|
401
415
|
while running?
|
402
416
|
begin
|
403
417
|
@instrumenter.instrument("loop.consumer") do
|
418
|
+
refresh_topic_list_if_enabled
|
404
419
|
yield
|
405
420
|
end
|
406
421
|
rescue HeartbeatError
|
@@ -432,6 +447,7 @@ module Kafka
|
|
432
447
|
# important that members explicitly tell Kafka when they're leaving.
|
433
448
|
make_final_offsets_commit!
|
434
449
|
@group.leave rescue nil
|
450
|
+
@cluster.disconnect
|
435
451
|
@running = false
|
436
452
|
@logger.pop_tags
|
437
453
|
end
|
@@ -452,6 +468,8 @@ module Kafka
|
|
452
468
|
end
|
453
469
|
|
454
470
|
def join_group
|
471
|
+
@join_group_for_new_topics = false
|
472
|
+
|
455
473
|
old_generation_id = @group.generation_id
|
456
474
|
|
457
475
|
@group.join
|
@@ -513,11 +531,19 @@ module Kafka
|
|
513
531
|
end
|
514
532
|
end
|
515
533
|
|
534
|
+
def refresh_topic_list_if_enabled
|
535
|
+
return if @refresh_topic_interval <= 0
|
536
|
+
return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
|
537
|
+
|
538
|
+
scan_for_subscribing
|
539
|
+
@refreshed_at = Time.now
|
540
|
+
end
|
541
|
+
|
516
542
|
def fetch_batches
|
517
543
|
# Return early if the consumer has been stopped.
|
518
544
|
return [] if shutting_down?
|
519
545
|
|
520
|
-
join_group
|
546
|
+
join_group if !@group.member? || @join_group_for_new_topics
|
521
547
|
|
522
548
|
trigger_heartbeat
|
523
549
|
|
@@ -525,7 +551,7 @@ module Kafka
|
|
525
551
|
|
526
552
|
if !@fetcher.data?
|
527
553
|
@logger.debug "No batches to process"
|
528
|
-
sleep 2
|
554
|
+
sleep(@fetcher.max_wait_time || 2)
|
529
555
|
[]
|
530
556
|
else
|
531
557
|
tag, message = @fetcher.poll
|
@@ -571,10 +597,34 @@ module Kafka
|
|
571
597
|
end
|
572
598
|
end
|
573
599
|
|
600
|
+
def scan_for_subscribing
|
601
|
+
@subscribed_topics.each do |topic_or_regex, config|
|
602
|
+
default_offset = config.fetch(:default_offset)
|
603
|
+
start_from_beginning = config.fetch(:start_from_beginning)
|
604
|
+
max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
|
605
|
+
if topic_or_regex.is_a?(Regexp)
|
606
|
+
subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
607
|
+
else
|
608
|
+
subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
609
|
+
end
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
614
|
+
cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
|
615
|
+
subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
616
|
+
end
|
617
|
+
end
|
618
|
+
|
574
619
|
def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
620
|
+
return if @matched_topics.include?(topic)
|
621
|
+
@matched_topics.add(topic)
|
622
|
+
@join_group_for_new_topics = true
|
623
|
+
|
575
624
|
@group.subscribe(topic)
|
576
625
|
@offset_manager.set_default_offset(topic, default_offset)
|
577
626
|
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
627
|
+
@cluster.mark_as_stale!
|
578
628
|
end
|
579
629
|
|
580
630
|
def cluster_topics
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/protocol/member_assignment"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class ConsumerGroup
|
7
|
+
|
8
|
+
# A consumer group partition assignor
|
9
|
+
class Assignor
|
10
|
+
Partition = Struct.new(:topic, :partition_id)
|
11
|
+
|
12
|
+
# @param cluster [Kafka::Cluster]
|
13
|
+
# @param strategy [Object] an object that implements #protocol_type,
|
14
|
+
# #user_data, and #assign.
|
15
|
+
def initialize(cluster:, strategy:)
|
16
|
+
@cluster = cluster
|
17
|
+
@strategy = strategy
|
18
|
+
end
|
19
|
+
|
20
|
+
def protocol_name
|
21
|
+
@strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def user_data
|
25
|
+
@strategy.user_data if @strategy.respond_to?(:user_data)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Assign the topic partitions to the group members.
|
29
|
+
#
|
30
|
+
# @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
|
31
|
+
# mapping member ids to metadata.
|
32
|
+
# @param topics [Array<String>] topics
|
33
|
+
# @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
|
34
|
+
# ids to assignments.
|
35
|
+
def assign(members:, topics:)
|
36
|
+
topic_partitions = topics.flat_map do |topic|
|
37
|
+
begin
|
38
|
+
partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
|
39
|
+
rescue UnknownTopicOrPartition
|
40
|
+
raise UnknownTopicOrPartition, "unknown topic #{topic}"
|
41
|
+
end
|
42
|
+
partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
|
43
|
+
end
|
44
|
+
|
45
|
+
group_assignment = {}
|
46
|
+
|
47
|
+
members.each_key do |member_id|
|
48
|
+
group_assignment[member_id] = Protocol::MemberAssignment.new
|
49
|
+
end
|
50
|
+
@strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
|
51
|
+
Array(partitions).each do |partition|
|
52
|
+
group_assignment[member_id].assign(partition.topic, [partition.partition_id])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
group_assignment
|
57
|
+
rescue Kafka::LeaderNotAvailable
|
58
|
+
sleep 1
|
59
|
+
retry
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/kafka/consumer_group.rb
CHANGED
@@ -1,24 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "set"
|
4
|
+
require "kafka/consumer_group/assignor"
|
4
5
|
require "kafka/round_robin_assignment_strategy"
|
5
6
|
|
6
7
|
module Kafka
|
7
8
|
class ConsumerGroup
|
8
9
|
attr_reader :assigned_partitions, :generation_id, :group_id
|
9
10
|
|
10
|
-
def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
|
11
|
+
def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
|
11
12
|
@cluster = cluster
|
12
13
|
@logger = TaggedLogger.new(logger)
|
13
14
|
@group_id = group_id
|
14
15
|
@session_timeout = session_timeout
|
16
|
+
@rebalance_timeout = rebalance_timeout
|
15
17
|
@instrumenter = instrumenter
|
16
18
|
@member_id = ""
|
17
19
|
@generation_id = nil
|
18
20
|
@members = {}
|
19
21
|
@topics = Set.new
|
20
22
|
@assigned_partitions = {}
|
21
|
-
@
|
23
|
+
@assignor = Assignor.new(
|
24
|
+
cluster: cluster,
|
25
|
+
strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
|
26
|
+
)
|
22
27
|
@retention_time = retention_time
|
23
28
|
end
|
24
29
|
|
@@ -112,9 +117,12 @@ module Kafka
|
|
112
117
|
|
113
118
|
Protocol.handle_error(response.error_code)
|
114
119
|
end
|
115
|
-
rescue ConnectionError, UnknownMemberId,
|
120
|
+
rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
|
116
121
|
@logger.error "Error sending heartbeat: #{e}"
|
117
122
|
raise HeartbeatError, e
|
123
|
+
rescue RebalanceInProgress => e
|
124
|
+
@logger.warn "Error sending heartbeat: #{e}"
|
125
|
+
raise HeartbeatError, e
|
118
126
|
rescue NotCoordinatorForGroup
|
119
127
|
@logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
|
120
128
|
sleep 1
|
@@ -140,7 +148,11 @@ module Kafka
|
|
140
148
|
response = coordinator.join_group(
|
141
149
|
group_id: @group_id,
|
142
150
|
session_timeout: @session_timeout,
|
151
|
+
rebalance_timeout: @rebalance_timeout,
|
143
152
|
member_id: @member_id,
|
153
|
+
topics: @topics,
|
154
|
+
protocol_name: @assignor.protocol_name,
|
155
|
+
user_data: @assignor.user_data,
|
144
156
|
)
|
145
157
|
|
146
158
|
Protocol.handle_error(response.error_code)
|
@@ -158,6 +170,12 @@ module Kafka
|
|
158
170
|
@member_id = ""
|
159
171
|
sleep 1
|
160
172
|
|
173
|
+
retry
|
174
|
+
rescue CoordinatorLoadInProgress
|
175
|
+
@logger.error "Coordinator broker still loading, retrying in 1s..."
|
176
|
+
|
177
|
+
sleep 1
|
178
|
+
|
161
179
|
retry
|
162
180
|
end
|
163
181
|
|
@@ -171,9 +189,14 @@ module Kafka
|
|
171
189
|
if group_leader?
|
172
190
|
@logger.info "Chosen as leader of group `#{@group_id}`"
|
173
191
|
|
174
|
-
|
175
|
-
|
176
|
-
topics
|
192
|
+
topics = Set.new
|
193
|
+
@members.each do |_member, metadata|
|
194
|
+
metadata.topics.each { |t| topics.add(t) }
|
195
|
+
end
|
196
|
+
|
197
|
+
group_assignment = @assignor.assign(
|
198
|
+
members: @members,
|
199
|
+
topics: topics,
|
177
200
|
)
|
178
201
|
end
|
179
202
|
|
data/lib/kafka/datadog.rb
CHANGED
@@ -31,7 +31,7 @@ module Kafka
|
|
31
31
|
|
32
32
|
class << self
|
33
33
|
def statsd
|
34
|
-
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
|
34
|
+
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
|
35
35
|
end
|
36
36
|
|
37
37
|
def statsd=(statsd)
|
@@ -40,7 +40,7 @@ module Kafka
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def host
|
43
|
-
@host
|
43
|
+
@host
|
44
44
|
end
|
45
45
|
|
46
46
|
def host=(host)
|
@@ -49,7 +49,7 @@ module Kafka
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def port
|
52
|
-
@port
|
52
|
+
@port
|
53
53
|
end
|
54
54
|
|
55
55
|
def port=(port)
|
@@ -57,6 +57,15 @@ module Kafka
|
|
57
57
|
clear
|
58
58
|
end
|
59
59
|
|
60
|
+
def socket_path
|
61
|
+
@socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
def socket_path=(socket_path)
|
65
|
+
@socket_path = socket_path
|
66
|
+
clear
|
67
|
+
end
|
68
|
+
|
60
69
|
def namespace
|
61
70
|
@namespace ||= STATSD_NAMESPACE
|
62
71
|
end
|
@@ -77,14 +86,6 @@ module Kafka
|
|
77
86
|
|
78
87
|
private
|
79
88
|
|
80
|
-
def default_host
|
81
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
|
82
|
-
end
|
83
|
-
|
84
|
-
def default_port
|
85
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
|
86
|
-
end
|
87
|
-
|
88
89
|
def clear
|
89
90
|
@statsd && @statsd.close
|
90
91
|
@statsd = nil
|
@@ -95,8 +96,8 @@ module Kafka
|
|
95
96
|
private
|
96
97
|
|
97
98
|
%w[increment histogram count timing gauge].each do |type|
|
98
|
-
define_method(type) do |*args|
|
99
|
-
emit(type, *args)
|
99
|
+
define_method(type) do |*args, **kwargs|
|
100
|
+
emit(type, *args, **kwargs)
|
100
101
|
end
|
101
102
|
end
|
102
103
|
|
@@ -168,6 +169,8 @@ module Kafka
|
|
168
169
|
def process_batch(event)
|
169
170
|
offset = event.payload.fetch(:last_offset)
|
170
171
|
messages = event.payload.fetch(:message_count)
|
172
|
+
create_time = event.payload.fetch(:last_create_time)
|
173
|
+
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
171
174
|
|
172
175
|
tags = {
|
173
176
|
client: event.payload.fetch(:client_id),
|
@@ -184,6 +187,10 @@ module Kafka
|
|
184
187
|
end
|
185
188
|
|
186
189
|
gauge("consumer.offset", offset, tags: tags)
|
190
|
+
|
191
|
+
if time_lag
|
192
|
+
gauge("consumer.time_lag", time_lag, tags: tags)
|
193
|
+
end
|
187
194
|
end
|
188
195
|
|
189
196
|
def fetch_batch(event)
|
data/lib/kafka/digest.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/crc32_hash"
|
4
|
+
require "kafka/murmur2_hash"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
module Digest
|
8
|
+
FUNCTIONS_BY_NAME = {
|
9
|
+
:crc32 => Crc32Hash.new,
|
10
|
+
:murmur2 => Murmur2Hash.new
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
def self.find_digest(name)
|
14
|
+
digest = FUNCTIONS_BY_NAME.fetch(name) do
|
15
|
+
raise LoadError, "Unknown hash function #{name}"
|
16
|
+
end
|
17
|
+
|
18
|
+
digest.load
|
19
|
+
digest
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/kafka/fetcher.rb
CHANGED
@@ -4,7 +4,7 @@ require "kafka/fetch_operation"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
class Fetcher
|
7
|
-
attr_reader :queue
|
7
|
+
attr_reader :queue, :max_wait_time
|
8
8
|
|
9
9
|
def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
|
10
10
|
@cluster = cluster
|
@@ -17,6 +17,9 @@ module Kafka
|
|
17
17
|
@commands = Queue.new
|
18
18
|
@next_offsets = Hash.new { |h, k| h[k] = {} }
|
19
19
|
|
20
|
+
# We are only running when someone calls start.
|
21
|
+
@running = false
|
22
|
+
|
20
23
|
# Long poll until at least this many bytes can be fetched.
|
21
24
|
@min_bytes = 1
|
22
25
|
|
@@ -110,7 +113,7 @@ module Kafka
|
|
110
113
|
elsif @queue.size < @max_queue_size
|
111
114
|
step
|
112
115
|
else
|
113
|
-
@logger.
|
116
|
+
@logger.info "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
|
114
117
|
sleep 1
|
115
118
|
end
|
116
119
|
ensure
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Holds a list of interceptors that implement `call`
|
5
|
+
# and wraps calls to a chain of custom interceptors.
|
6
|
+
class Interceptors
|
7
|
+
def initialize(interceptors:, logger:)
|
8
|
+
@interceptors = interceptors || []
|
9
|
+
@logger = TaggedLogger.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This method is called when the client produces a message or once the batches are fetched.
|
13
|
+
# The message returned from the first call is passed to the second interceptor call, and so on in an
|
14
|
+
# interceptor chain. This method does not throw exceptions.
|
15
|
+
#
|
16
|
+
# @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
|
17
|
+
# fetched batch.
|
18
|
+
#
|
19
|
+
# @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
|
20
|
+
# returned by the last interceptor.
|
21
|
+
def call(intercepted)
|
22
|
+
@interceptors.each do |interceptor|
|
23
|
+
begin
|
24
|
+
intercepted = interceptor.call(intercepted)
|
25
|
+
rescue Exception => e
|
26
|
+
@logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
intercepted
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Murmur2Hash
|
5
|
+
SEED = [0x9747b28c].pack('L')
|
6
|
+
|
7
|
+
def load
|
8
|
+
require 'digest/murmurhash'
|
9
|
+
rescue LoadError
|
10
|
+
raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
|
11
|
+
end
|
12
|
+
|
13
|
+
def hash(value)
|
14
|
+
::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/kafka/offset_manager.rb
CHANGED
@@ -50,9 +50,20 @@ module Kafka
|
|
50
50
|
# @param offset [Integer] the offset of the message that should be marked as processed.
|
51
51
|
# @return [nil]
|
52
52
|
def mark_as_processed(topic, partition, offset)
|
53
|
-
@
|
53
|
+
unless @group.assigned_to?(topic, partition)
|
54
|
+
@logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
|
55
|
+
return
|
56
|
+
end
|
54
57
|
@processed_offsets[topic] ||= {}
|
55
58
|
|
59
|
+
last_processed_offset = @processed_offsets[topic][partition] || -1
|
60
|
+
if last_processed_offset > offset + 1
|
61
|
+
@logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
@uncommitted_offsets += 1
|
66
|
+
|
56
67
|
# The committed offset should always be the offset of the next message that the
|
57
68
|
# application will read, thus adding one to the last message processed.
|
58
69
|
@processed_offsets[topic][partition] = offset + 1
|
data/lib/kafka/partitioner.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "
|
3
|
+
require "kafka/digest"
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
|
7
7
|
# Assigns partitions to messages.
|
8
8
|
class Partitioner
|
9
|
+
# @param hash_function [Symbol, nil] the algorithm used to compute a messages
|
10
|
+
# destination partition. Default is :crc32
|
11
|
+
def initialize(hash_function: nil)
|
12
|
+
@digest = Digest.find_digest(hash_function || :crc32)
|
13
|
+
end
|
9
14
|
|
10
15
|
# Assigns a partition number based on a partition key. If no explicit
|
11
16
|
# partition key is provided, the message key will be used instead.
|
@@ -19,7 +24,7 @@ module Kafka
|
|
19
24
|
# @param message [Kafka::PendingMessage] the message that should be assigned
|
20
25
|
# a partition.
|
21
26
|
# @return [Integer] the partition number.
|
22
|
-
def
|
27
|
+
def call(partition_count, message)
|
23
28
|
raise ArgumentError if partition_count == 0
|
24
29
|
|
25
30
|
# If no explicit partition key is specified we use the message key instead.
|
@@ -28,7 +33,7 @@ module Kafka
|
|
28
33
|
if key.nil?
|
29
34
|
rand(partition_count)
|
30
35
|
else
|
31
|
-
|
36
|
+
@digest.hash(key) % partition_count
|
32
37
|
end
|
33
38
|
end
|
34
39
|
end
|
data/lib/kafka/producer.rb
CHANGED
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
|
|
7
7
|
require "kafka/pending_message_queue"
|
8
8
|
require "kafka/pending_message"
|
9
9
|
require "kafka/compressor"
|
10
|
+
require "kafka/interceptors"
|
10
11
|
|
11
12
|
module Kafka
|
12
13
|
# Allows sending messages to a Kafka cluster.
|
@@ -129,7 +130,9 @@ module Kafka
|
|
129
130
|
class Producer
|
130
131
|
class AbortTransaction < StandardError; end
|
131
132
|
|
132
|
-
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
133
|
+
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
134
|
+
required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
|
135
|
+
max_buffer_bytesize:, partitioner:, interceptors: [])
|
133
136
|
@cluster = cluster
|
134
137
|
@transaction_manager = transaction_manager
|
135
138
|
@logger = TaggedLogger.new(logger)
|
@@ -141,6 +144,8 @@ module Kafka
|
|
141
144
|
@max_buffer_size = max_buffer_size
|
142
145
|
@max_buffer_bytesize = max_buffer_bytesize
|
143
146
|
@compressor = compressor
|
147
|
+
@partitioner = partitioner
|
148
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
144
149
|
|
145
150
|
# The set of topics that are produced to.
|
146
151
|
@target_topics = Set.new
|
@@ -188,15 +193,18 @@ module Kafka
|
|
188
193
|
# @raise [BufferOverflow] if the maximum buffer size has been reached.
|
189
194
|
# @return [nil]
|
190
195
|
def produce(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, create_time: Time.now)
|
191
|
-
|
196
|
+
# We want to fail fast if `topic` isn't a String
|
197
|
+
topic = topic.to_str
|
198
|
+
|
199
|
+
message = @interceptors.call(PendingMessage.new(
|
192
200
|
value: value && value.to_s,
|
193
201
|
key: key && key.to_s,
|
194
202
|
headers: headers,
|
195
|
-
topic: topic
|
203
|
+
topic: topic,
|
196
204
|
partition: partition && Integer(partition),
|
197
205
|
partition_key: partition_key && partition_key.to_s,
|
198
206
|
create_time: create_time
|
199
|
-
)
|
207
|
+
))
|
200
208
|
|
201
209
|
if buffer_size >= @max_buffer_size
|
202
210
|
buffer_overflow topic,
|
@@ -452,7 +460,7 @@ module Kafka
|
|
452
460
|
|
453
461
|
if partition.nil?
|
454
462
|
partition_count = @cluster.partitions_for(message.topic).count
|
455
|
-
partition =
|
463
|
+
partition = @partitioner.call(partition_count, message)
|
456
464
|
end
|
457
465
|
|
458
466
|
@buffer.write(
|