ruby-kafka 0.7.10 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +179 -0
- data/.github/workflows/stale.yml +19 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +40 -0
- data/README.md +167 -0
- data/lib/kafka/async_producer.rb +60 -42
- data/lib/kafka/client.rb +92 -6
- data/lib/kafka/cluster.rb +82 -24
- data/lib/kafka/connection.rb +3 -0
- data/lib/kafka/consumer.rb +61 -11
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/consumer_group.rb +29 -6
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +20 -13
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/fetcher.rb +5 -2
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/partitioner.rb +8 -3
- data/lib/kafka/producer.rb +13 -5
- data/lib/kafka/prometheus.rb +78 -79
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +2 -0
- data/lib/kafka/protocol/encoder.rb +1 -1
- data/lib/kafka/protocol/join_group_request.rb +8 -2
- data/lib/kafka/protocol/join_group_response.rb +9 -1
- data/lib/kafka/protocol/metadata_response.rb +1 -1
- data/lib/kafka/protocol/offset_fetch_request.rb +3 -1
- data/lib/kafka/protocol/record_batch.rb +2 -2
- data/lib/kafka/protocol/sasl_handshake_request.rb +1 -1
- data/lib/kafka/protocol/sync_group_response.rb +5 -2
- data/lib/kafka/protocol/txn_offset_commit_response.rb +34 -5
- data/lib/kafka/round_robin_assignment_strategy.rb +37 -39
- data/lib/kafka/sasl/awsmskiam.rb +133 -0
- data/lib/kafka/sasl_authenticator.rb +15 -2
- data/lib/kafka/ssl_context.rb +6 -5
- data/lib/kafka/tagged_logger.rb +1 -0
- data/lib/kafka/transaction_manager.rb +30 -10
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +5 -4
- metadata +39 -13
data/lib/kafka/consumer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/consumer_group"
|
4
|
+
require "kafka/interceptors"
|
4
5
|
require "kafka/offset_manager"
|
5
6
|
require "kafka/fetcher"
|
6
7
|
require "kafka/pause"
|
@@ -44,7 +45,8 @@ module Kafka
|
|
44
45
|
#
|
45
46
|
class Consumer
|
46
47
|
|
47
|
-
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
48
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
49
|
+
session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
|
48
50
|
@cluster = cluster
|
49
51
|
@logger = TaggedLogger.new(logger)
|
50
52
|
@instrumenter = instrumenter
|
@@ -53,6 +55,8 @@ module Kafka
|
|
53
55
|
@session_timeout = session_timeout
|
54
56
|
@fetcher = fetcher
|
55
57
|
@heartbeat = heartbeat
|
58
|
+
@refresh_topic_interval = refresh_topic_interval
|
59
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
56
60
|
|
57
61
|
@pauses = Hash.new {|h, k|
|
58
62
|
h[k] = Hash.new {|h2, k2|
|
@@ -73,6 +77,15 @@ module Kafka
|
|
73
77
|
# when user commits message other than last in a batch, this would make ruby-kafka refetch
|
74
78
|
# some already consumed messages
|
75
79
|
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
80
|
+
|
81
|
+
# Map storing subscribed topics with their configuration
|
82
|
+
@subscribed_topics = Hash.new
|
83
|
+
|
84
|
+
# Set storing topics that matched topics in @subscribed_topics
|
85
|
+
@matched_topics = Set.new
|
86
|
+
|
87
|
+
# Whether join_group must be executed again because new topics are added
|
88
|
+
@join_group_for_new_topics = false
|
76
89
|
end
|
77
90
|
|
78
91
|
# Subscribes the consumer to a topic.
|
@@ -97,13 +110,12 @@ module Kafka
|
|
97
110
|
def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
|
98
111
|
default_offset ||= start_from_beginning ? :earliest : :latest
|
99
112
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
113
|
+
@subscribed_topics[topic_or_regex] = {
|
114
|
+
default_offset: default_offset,
|
115
|
+
start_from_beginning: start_from_beginning,
|
116
|
+
max_bytes_per_partition: max_bytes_per_partition
|
117
|
+
}
|
118
|
+
scan_for_subscribing
|
107
119
|
|
108
120
|
nil
|
109
121
|
end
|
@@ -116,7 +128,6 @@ module Kafka
|
|
116
128
|
def stop
|
117
129
|
@running = false
|
118
130
|
@fetcher.stop
|
119
|
-
@cluster.disconnect
|
120
131
|
end
|
121
132
|
|
122
133
|
# Pause processing of a specific topic partition.
|
@@ -212,6 +223,7 @@ module Kafka
|
|
212
223
|
batches = fetch_batches
|
213
224
|
|
214
225
|
batches.each do |batch|
|
226
|
+
batch = @interceptors.call(batch)
|
215
227
|
batch.messages.each do |message|
|
216
228
|
notification = {
|
217
229
|
topic: message.topic,
|
@@ -303,11 +315,13 @@ module Kafka
|
|
303
315
|
unless batch.empty?
|
304
316
|
raw_messages = batch.messages
|
305
317
|
batch.messages = raw_messages.reject(&:is_control_record)
|
318
|
+
batch = @interceptors.call(batch)
|
306
319
|
|
307
320
|
notification = {
|
308
321
|
topic: batch.topic,
|
309
322
|
partition: batch.partition,
|
310
323
|
last_offset: batch.last_offset,
|
324
|
+
last_create_time: batch.messages.last && batch.messages.last.create_time,
|
311
325
|
offset_lag: batch.offset_lag,
|
312
326
|
highwater_mark_offset: batch.highwater_mark_offset,
|
313
327
|
message_count: batch.messages.count,
|
@@ -401,6 +415,7 @@ module Kafka
|
|
401
415
|
while running?
|
402
416
|
begin
|
403
417
|
@instrumenter.instrument("loop.consumer") do
|
418
|
+
refresh_topic_list_if_enabled
|
404
419
|
yield
|
405
420
|
end
|
406
421
|
rescue HeartbeatError
|
@@ -432,6 +447,7 @@ module Kafka
|
|
432
447
|
# important that members explicitly tell Kafka when they're leaving.
|
433
448
|
make_final_offsets_commit!
|
434
449
|
@group.leave rescue nil
|
450
|
+
@cluster.disconnect
|
435
451
|
@running = false
|
436
452
|
@logger.pop_tags
|
437
453
|
end
|
@@ -452,6 +468,8 @@ module Kafka
|
|
452
468
|
end
|
453
469
|
|
454
470
|
def join_group
|
471
|
+
@join_group_for_new_topics = false
|
472
|
+
|
455
473
|
old_generation_id = @group.generation_id
|
456
474
|
|
457
475
|
@group.join
|
@@ -513,11 +531,19 @@ module Kafka
|
|
513
531
|
end
|
514
532
|
end
|
515
533
|
|
534
|
+
def refresh_topic_list_if_enabled
|
535
|
+
return if @refresh_topic_interval <= 0
|
536
|
+
return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
|
537
|
+
|
538
|
+
scan_for_subscribing
|
539
|
+
@refreshed_at = Time.now
|
540
|
+
end
|
541
|
+
|
516
542
|
def fetch_batches
|
517
543
|
# Return early if the consumer has been stopped.
|
518
544
|
return [] if shutting_down?
|
519
545
|
|
520
|
-
join_group
|
546
|
+
join_group if !@group.member? || @join_group_for_new_topics
|
521
547
|
|
522
548
|
trigger_heartbeat
|
523
549
|
|
@@ -525,7 +551,7 @@ module Kafka
|
|
525
551
|
|
526
552
|
if !@fetcher.data?
|
527
553
|
@logger.debug "No batches to process"
|
528
|
-
sleep 2
|
554
|
+
sleep(@fetcher.max_wait_time || 2)
|
529
555
|
[]
|
530
556
|
else
|
531
557
|
tag, message = @fetcher.poll
|
@@ -571,10 +597,34 @@ module Kafka
|
|
571
597
|
end
|
572
598
|
end
|
573
599
|
|
600
|
+
def scan_for_subscribing
|
601
|
+
@subscribed_topics.each do |topic_or_regex, config|
|
602
|
+
default_offset = config.fetch(:default_offset)
|
603
|
+
start_from_beginning = config.fetch(:start_from_beginning)
|
604
|
+
max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
|
605
|
+
if topic_or_regex.is_a?(Regexp)
|
606
|
+
subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
607
|
+
else
|
608
|
+
subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
609
|
+
end
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
614
|
+
cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
|
615
|
+
subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
616
|
+
end
|
617
|
+
end
|
618
|
+
|
574
619
|
def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
620
|
+
return if @matched_topics.include?(topic)
|
621
|
+
@matched_topics.add(topic)
|
622
|
+
@join_group_for_new_topics = true
|
623
|
+
|
575
624
|
@group.subscribe(topic)
|
576
625
|
@offset_manager.set_default_offset(topic, default_offset)
|
577
626
|
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
627
|
+
@cluster.mark_as_stale!
|
578
628
|
end
|
579
629
|
|
580
630
|
def cluster_topics
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/protocol/member_assignment"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class ConsumerGroup
|
7
|
+
|
8
|
+
# A consumer group partition assignor
|
9
|
+
class Assignor
|
10
|
+
Partition = Struct.new(:topic, :partition_id)
|
11
|
+
|
12
|
+
# @param cluster [Kafka::Cluster]
|
13
|
+
# @param strategy [Object] an object that implements #protocol_type,
|
14
|
+
# #user_data, and #assign.
|
15
|
+
def initialize(cluster:, strategy:)
|
16
|
+
@cluster = cluster
|
17
|
+
@strategy = strategy
|
18
|
+
end
|
19
|
+
|
20
|
+
def protocol_name
|
21
|
+
@strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def user_data
|
25
|
+
@strategy.user_data if @strategy.respond_to?(:user_data)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Assign the topic partitions to the group members.
|
29
|
+
#
|
30
|
+
# @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
|
31
|
+
# mapping member ids to metadata.
|
32
|
+
# @param topics [Array<String>] topics
|
33
|
+
# @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
|
34
|
+
# ids to assignments.
|
35
|
+
def assign(members:, topics:)
|
36
|
+
topic_partitions = topics.flat_map do |topic|
|
37
|
+
begin
|
38
|
+
partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
|
39
|
+
rescue UnknownTopicOrPartition
|
40
|
+
raise UnknownTopicOrPartition, "unknown topic #{topic}"
|
41
|
+
end
|
42
|
+
partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
|
43
|
+
end
|
44
|
+
|
45
|
+
group_assignment = {}
|
46
|
+
|
47
|
+
members.each_key do |member_id|
|
48
|
+
group_assignment[member_id] = Protocol::MemberAssignment.new
|
49
|
+
end
|
50
|
+
@strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
|
51
|
+
Array(partitions).each do |partition|
|
52
|
+
group_assignment[member_id].assign(partition.topic, [partition.partition_id])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
group_assignment
|
57
|
+
rescue Kafka::LeaderNotAvailable
|
58
|
+
sleep 1
|
59
|
+
retry
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/kafka/consumer_group.rb
CHANGED
@@ -1,24 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "set"
|
4
|
+
require "kafka/consumer_group/assignor"
|
4
5
|
require "kafka/round_robin_assignment_strategy"
|
5
6
|
|
6
7
|
module Kafka
|
7
8
|
class ConsumerGroup
|
8
9
|
attr_reader :assigned_partitions, :generation_id, :group_id
|
9
10
|
|
10
|
-
def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
|
11
|
+
def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
|
11
12
|
@cluster = cluster
|
12
13
|
@logger = TaggedLogger.new(logger)
|
13
14
|
@group_id = group_id
|
14
15
|
@session_timeout = session_timeout
|
16
|
+
@rebalance_timeout = rebalance_timeout
|
15
17
|
@instrumenter = instrumenter
|
16
18
|
@member_id = ""
|
17
19
|
@generation_id = nil
|
18
20
|
@members = {}
|
19
21
|
@topics = Set.new
|
20
22
|
@assigned_partitions = {}
|
21
|
-
@
|
23
|
+
@assignor = Assignor.new(
|
24
|
+
cluster: cluster,
|
25
|
+
strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
|
26
|
+
)
|
22
27
|
@retention_time = retention_time
|
23
28
|
end
|
24
29
|
|
@@ -112,9 +117,12 @@ module Kafka
|
|
112
117
|
|
113
118
|
Protocol.handle_error(response.error_code)
|
114
119
|
end
|
115
|
-
rescue ConnectionError, UnknownMemberId,
|
120
|
+
rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
|
116
121
|
@logger.error "Error sending heartbeat: #{e}"
|
117
122
|
raise HeartbeatError, e
|
123
|
+
rescue RebalanceInProgress => e
|
124
|
+
@logger.warn "Error sending heartbeat: #{e}"
|
125
|
+
raise HeartbeatError, e
|
118
126
|
rescue NotCoordinatorForGroup
|
119
127
|
@logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
|
120
128
|
sleep 1
|
@@ -140,7 +148,11 @@ module Kafka
|
|
140
148
|
response = coordinator.join_group(
|
141
149
|
group_id: @group_id,
|
142
150
|
session_timeout: @session_timeout,
|
151
|
+
rebalance_timeout: @rebalance_timeout,
|
143
152
|
member_id: @member_id,
|
153
|
+
topics: @topics,
|
154
|
+
protocol_name: @assignor.protocol_name,
|
155
|
+
user_data: @assignor.user_data,
|
144
156
|
)
|
145
157
|
|
146
158
|
Protocol.handle_error(response.error_code)
|
@@ -158,6 +170,12 @@ module Kafka
|
|
158
170
|
@member_id = ""
|
159
171
|
sleep 1
|
160
172
|
|
173
|
+
retry
|
174
|
+
rescue CoordinatorLoadInProgress
|
175
|
+
@logger.error "Coordinator broker still loading, retrying in 1s..."
|
176
|
+
|
177
|
+
sleep 1
|
178
|
+
|
161
179
|
retry
|
162
180
|
end
|
163
181
|
|
@@ -171,9 +189,14 @@ module Kafka
|
|
171
189
|
if group_leader?
|
172
190
|
@logger.info "Chosen as leader of group `#{@group_id}`"
|
173
191
|
|
174
|
-
|
175
|
-
|
176
|
-
topics
|
192
|
+
topics = Set.new
|
193
|
+
@members.each do |_member, metadata|
|
194
|
+
metadata.topics.each { |t| topics.add(t) }
|
195
|
+
end
|
196
|
+
|
197
|
+
group_assignment = @assignor.assign(
|
198
|
+
members: @members,
|
199
|
+
topics: topics,
|
177
200
|
)
|
178
201
|
end
|
179
202
|
|
data/lib/kafka/datadog.rb
CHANGED
@@ -31,7 +31,7 @@ module Kafka
|
|
31
31
|
|
32
32
|
class << self
|
33
33
|
def statsd
|
34
|
-
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
|
34
|
+
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
|
35
35
|
end
|
36
36
|
|
37
37
|
def statsd=(statsd)
|
@@ -40,7 +40,7 @@ module Kafka
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def host
|
43
|
-
@host
|
43
|
+
@host
|
44
44
|
end
|
45
45
|
|
46
46
|
def host=(host)
|
@@ -49,7 +49,7 @@ module Kafka
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def port
|
52
|
-
@port
|
52
|
+
@port
|
53
53
|
end
|
54
54
|
|
55
55
|
def port=(port)
|
@@ -57,6 +57,15 @@ module Kafka
|
|
57
57
|
clear
|
58
58
|
end
|
59
59
|
|
60
|
+
def socket_path
|
61
|
+
@socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
def socket_path=(socket_path)
|
65
|
+
@socket_path = socket_path
|
66
|
+
clear
|
67
|
+
end
|
68
|
+
|
60
69
|
def namespace
|
61
70
|
@namespace ||= STATSD_NAMESPACE
|
62
71
|
end
|
@@ -77,14 +86,6 @@ module Kafka
|
|
77
86
|
|
78
87
|
private
|
79
88
|
|
80
|
-
def default_host
|
81
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
|
82
|
-
end
|
83
|
-
|
84
|
-
def default_port
|
85
|
-
::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
|
86
|
-
end
|
87
|
-
|
88
89
|
def clear
|
89
90
|
@statsd && @statsd.close
|
90
91
|
@statsd = nil
|
@@ -95,8 +96,8 @@ module Kafka
|
|
95
96
|
private
|
96
97
|
|
97
98
|
%w[increment histogram count timing gauge].each do |type|
|
98
|
-
define_method(type) do |*args|
|
99
|
-
emit(type, *args)
|
99
|
+
define_method(type) do |*args, **kwargs|
|
100
|
+
emit(type, *args, **kwargs)
|
100
101
|
end
|
101
102
|
end
|
102
103
|
|
@@ -168,6 +169,8 @@ module Kafka
|
|
168
169
|
def process_batch(event)
|
169
170
|
offset = event.payload.fetch(:last_offset)
|
170
171
|
messages = event.payload.fetch(:message_count)
|
172
|
+
create_time = event.payload.fetch(:last_create_time)
|
173
|
+
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
171
174
|
|
172
175
|
tags = {
|
173
176
|
client: event.payload.fetch(:client_id),
|
@@ -184,6 +187,10 @@ module Kafka
|
|
184
187
|
end
|
185
188
|
|
186
189
|
gauge("consumer.offset", offset, tags: tags)
|
190
|
+
|
191
|
+
if time_lag
|
192
|
+
gauge("consumer.time_lag", time_lag, tags: tags)
|
193
|
+
end
|
187
194
|
end
|
188
195
|
|
189
196
|
def fetch_batch(event)
|
data/lib/kafka/digest.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/crc32_hash"
|
4
|
+
require "kafka/murmur2_hash"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
module Digest
|
8
|
+
FUNCTIONS_BY_NAME = {
|
9
|
+
:crc32 => Crc32Hash.new,
|
10
|
+
:murmur2 => Murmur2Hash.new
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
def self.find_digest(name)
|
14
|
+
digest = FUNCTIONS_BY_NAME.fetch(name) do
|
15
|
+
raise LoadError, "Unknown hash function #{name}"
|
16
|
+
end
|
17
|
+
|
18
|
+
digest.load
|
19
|
+
digest
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/kafka/fetcher.rb
CHANGED
@@ -4,7 +4,7 @@ require "kafka/fetch_operation"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
class Fetcher
|
7
|
-
attr_reader :queue
|
7
|
+
attr_reader :queue, :max_wait_time
|
8
8
|
|
9
9
|
def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
|
10
10
|
@cluster = cluster
|
@@ -17,6 +17,9 @@ module Kafka
|
|
17
17
|
@commands = Queue.new
|
18
18
|
@next_offsets = Hash.new { |h, k| h[k] = {} }
|
19
19
|
|
20
|
+
# We are only running when someone calls start.
|
21
|
+
@running = false
|
22
|
+
|
20
23
|
# Long poll until at least this many bytes can be fetched.
|
21
24
|
@min_bytes = 1
|
22
25
|
|
@@ -110,7 +113,7 @@ module Kafka
|
|
110
113
|
elsif @queue.size < @max_queue_size
|
111
114
|
step
|
112
115
|
else
|
113
|
-
@logger.
|
116
|
+
@logger.info "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
|
114
117
|
sleep 1
|
115
118
|
end
|
116
119
|
ensure
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Holds a list of interceptors that implement `call`
|
5
|
+
# and wraps calls to a chain of custom interceptors.
|
6
|
+
class Interceptors
|
7
|
+
def initialize(interceptors:, logger:)
|
8
|
+
@interceptors = interceptors || []
|
9
|
+
@logger = TaggedLogger.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This method is called when the client produces a message or once the batches are fetched.
|
13
|
+
# The message returned from the first call is passed to the second interceptor call, and so on in an
|
14
|
+
# interceptor chain. This method does not throw exceptions.
|
15
|
+
#
|
16
|
+
# @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
|
17
|
+
# fetched batch.
|
18
|
+
#
|
19
|
+
# @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
|
20
|
+
# returned by the last interceptor.
|
21
|
+
def call(intercepted)
|
22
|
+
@interceptors.each do |interceptor|
|
23
|
+
begin
|
24
|
+
intercepted = interceptor.call(intercepted)
|
25
|
+
rescue Exception => e
|
26
|
+
@logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
intercepted
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Murmur2Hash
|
5
|
+
SEED = [0x9747b28c].pack('L')
|
6
|
+
|
7
|
+
def load
|
8
|
+
require 'digest/murmurhash'
|
9
|
+
rescue LoadError
|
10
|
+
raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
|
11
|
+
end
|
12
|
+
|
13
|
+
def hash(value)
|
14
|
+
::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/kafka/offset_manager.rb
CHANGED
@@ -50,9 +50,20 @@ module Kafka
|
|
50
50
|
# @param offset [Integer] the offset of the message that should be marked as processed.
|
51
51
|
# @return [nil]
|
52
52
|
def mark_as_processed(topic, partition, offset)
|
53
|
-
@
|
53
|
+
unless @group.assigned_to?(topic, partition)
|
54
|
+
@logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
|
55
|
+
return
|
56
|
+
end
|
54
57
|
@processed_offsets[topic] ||= {}
|
55
58
|
|
59
|
+
last_processed_offset = @processed_offsets[topic][partition] || -1
|
60
|
+
if last_processed_offset > offset + 1
|
61
|
+
@logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
@uncommitted_offsets += 1
|
66
|
+
|
56
67
|
# The committed offset should always be the offset of the next message that the
|
57
68
|
# application will read, thus adding one to the last message processed.
|
58
69
|
@processed_offsets[topic][partition] = offset + 1
|
data/lib/kafka/partitioner.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "
|
3
|
+
require "kafka/digest"
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
|
7
7
|
# Assigns partitions to messages.
|
8
8
|
class Partitioner
|
9
|
+
# @param hash_function [Symbol, nil] the algorithm used to compute a messages
|
10
|
+
# destination partition. Default is :crc32
|
11
|
+
def initialize(hash_function: nil)
|
12
|
+
@digest = Digest.find_digest(hash_function || :crc32)
|
13
|
+
end
|
9
14
|
|
10
15
|
# Assigns a partition number based on a partition key. If no explicit
|
11
16
|
# partition key is provided, the message key will be used instead.
|
@@ -19,7 +24,7 @@ module Kafka
|
|
19
24
|
# @param message [Kafka::PendingMessage] the message that should be assigned
|
20
25
|
# a partition.
|
21
26
|
# @return [Integer] the partition number.
|
22
|
-
def
|
27
|
+
def call(partition_count, message)
|
23
28
|
raise ArgumentError if partition_count == 0
|
24
29
|
|
25
30
|
# If no explicit partition key is specified we use the message key instead.
|
@@ -28,7 +33,7 @@ module Kafka
|
|
28
33
|
if key.nil?
|
29
34
|
rand(partition_count)
|
30
35
|
else
|
31
|
-
|
36
|
+
@digest.hash(key) % partition_count
|
32
37
|
end
|
33
38
|
end
|
34
39
|
end
|
data/lib/kafka/producer.rb
CHANGED
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
|
|
7
7
|
require "kafka/pending_message_queue"
|
8
8
|
require "kafka/pending_message"
|
9
9
|
require "kafka/compressor"
|
10
|
+
require "kafka/interceptors"
|
10
11
|
|
11
12
|
module Kafka
|
12
13
|
# Allows sending messages to a Kafka cluster.
|
@@ -129,7 +130,9 @@ module Kafka
|
|
129
130
|
class Producer
|
130
131
|
class AbortTransaction < StandardError; end
|
131
132
|
|
132
|
-
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
133
|
+
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
134
|
+
required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
|
135
|
+
max_buffer_bytesize:, partitioner:, interceptors: [])
|
133
136
|
@cluster = cluster
|
134
137
|
@transaction_manager = transaction_manager
|
135
138
|
@logger = TaggedLogger.new(logger)
|
@@ -141,6 +144,8 @@ module Kafka
|
|
141
144
|
@max_buffer_size = max_buffer_size
|
142
145
|
@max_buffer_bytesize = max_buffer_bytesize
|
143
146
|
@compressor = compressor
|
147
|
+
@partitioner = partitioner
|
148
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
144
149
|
|
145
150
|
# The set of topics that are produced to.
|
146
151
|
@target_topics = Set.new
|
@@ -188,15 +193,18 @@ module Kafka
|
|
188
193
|
# @raise [BufferOverflow] if the maximum buffer size has been reached.
|
189
194
|
# @return [nil]
|
190
195
|
def produce(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, create_time: Time.now)
|
191
|
-
|
196
|
+
# We want to fail fast if `topic` isn't a String
|
197
|
+
topic = topic.to_str
|
198
|
+
|
199
|
+
message = @interceptors.call(PendingMessage.new(
|
192
200
|
value: value && value.to_s,
|
193
201
|
key: key && key.to_s,
|
194
202
|
headers: headers,
|
195
|
-
topic: topic
|
203
|
+
topic: topic,
|
196
204
|
partition: partition && Integer(partition),
|
197
205
|
partition_key: partition_key && partition_key.to_s,
|
198
206
|
create_time: create_time
|
199
|
-
)
|
207
|
+
))
|
200
208
|
|
201
209
|
if buffer_size >= @max_buffer_size
|
202
210
|
buffer_overflow topic,
|
@@ -452,7 +460,7 @@ module Kafka
|
|
452
460
|
|
453
461
|
if partition.nil?
|
454
462
|
partition_count = @cluster.partitions_for(message.topic).count
|
455
|
-
partition =
|
463
|
+
partition = @partitioner.call(partition_count, message)
|
456
464
|
end
|
457
465
|
|
458
466
|
@buffer.write(
|